subroutine dlansvd_irl(which,jobu,jobv,m,n,dim,p,neig,maxiter, c aprod,U,ldu,Sigma,bnd,V,ldv,tolin,work,lwork,iwork, c liwork,doption,ioption,info,dparm,iparm) c DLANSVD_IRL: Compute the leading singular triplets of a large c and sparse matrix A by implicitly restarted Lanczos bidiagonalization c with partial reorthogonalization. c c Parameters: c c WHICH: CHARACTER*1. Decides which singular triplets to compute. c If WHICH.EQ.'L' then compute triplets corresponding to the K c largest singular values. c If WHICH.EQ.'S' then compute triplets corresponding to the K c smallest singular values. c JOBU: CHARACTER*1. If JOBU.EQ.'Y' then compute the left singular vectors. c Otherwise the array U is not touched. c JOBV: CHARACTER*1. If JOBV.EQ.'Y' then compute the right singular c vectors. Otherwise the array V is not touched. c M: INTEGER. Number of rows of A. c N: INTEGER. Number of columns of A. c DIM: INTEGER. Dimension of the Krylov subspace. c P: INTEGER. Number of shift per restart. c NEIG: INTEGER. Number of desired singular triplets. c NEIG <= MIN(DIM-P,M,N) c MAXITER: INTEGER. Maximum number of restarts. c APROD: Subroutine defining the linear operator A. c APROD should be of the form: c c SUBROUTINE DAPROD(TRANSA,M,N,X,Y,DPARM,IPARM) c CHARACTER*1 TRANSA c INTEGER M,N,IPARM(*) c DOUBLE PRECISION X(*),Y(*),DPARM(*) c c If TRANSA.EQ.'N' then the function should compute the matrix-vector c product Y = A * X. c If TRANSA.EQ.'T' then the function should compute the matrix-vector c product Y = A^T * X. c The arrays IPARM and DPARM are a means to pass user supplied c data to APROD without the use of common blocks. c U(LDU,KMAX+1): DOUBLE PRECISION array. On return the first K columns of U c will contain approximations to the left singular vectors c corresponding to the K largest or smallest (depending on the c value of WHICH) singular values of A. c On entry the first column of U contains the starting vector c for the Lanczos bidiagonalization. A random starting vector c is used if U is zero. c LDU: INTEGER. Leading dimension of the array U. LDV >= M. c SIGMA(K): DOUBLE PRECISION array. On return Sigma contains approximation c to the K largest or smallest (depending on the c value of WHICH) singular values of A. c BND(K) : DOUBLE PRECISION array. Error estimates on the computed c singular values. The computed SIGMA(I) is within BND(I) c of a singular value of A. c V(LDV,KMAX): DOUBLE PRECISION array. On return the first K columns of V c will contain approximations to the right singular vectors c corresponding to the K largest or smallest (depending on the c value of WHICH) singular values of A. c LDV: INTEGER. Leading dimension of the array V. LDV >= N. c TOLIN: DOUBLE PRECISION. Desired relative accuracy of computed singular c values. The error of SIGMA(I) is approximately c MAX( 16*EPS*SIGMA(1), TOLIN*SIGMA(I) ) c WORK(LWORK): DOUBLE PRECISION array. Workspace of dimension LWORK. c LWORK: INTEGER. Dimension of WORK. c If JOBU.EQ.'N' and JOBV.EQ.'N' then LWORK should be at least c M + N + 10*KMAX + 2*KMAX**2 + 5 + MAX(M,N,4*KMAX+4). c If JOBU.EQ.'Y' or JOBV.EQ.'Y' then LWORK should be at least c M + N + 10*KMAX + 5*KMAX**2 + 4 + c MAX(3*KMAX**2+4*KMAX+4, NB*MAX(M,N)), where NB>0 is a block c size, which determines how large a fraction of the work in c setting up the singular vectors is done using fast BLAS-3 c operation. c IWORK: INTEGER array. Integer workspace of dimension LIWORK. c LIWORK: INTEGER. Dimension of IWORK. Should be at least 8*KMAX if c JOBU.EQ.'Y' or JOBV.EQ.'Y' and at least 2*KMAX+1 otherwise. c DOPTION: DOUBLE PRECISION array. Parameters for LANBPRO. c doption(1) = delta. Level of orthogonality to maintain among c Lanczos vectors. c doption(2) = eta. During reorthogonalization, all vectors with c with components larger than eta along the latest Lanczos vector c will be purged. c doption(3) = anorm. Estimate of || A ||. c doption(4) = min relgap. Smallest relgap allowed between any shift c the smallest requested Ritz value. c c IOPTION: INTEGER array. Parameters for LANBPRO. c ioption(1) = CGS. If CGS.EQ.1 then reorthogonalization is done c using iterated classical Gram-Schmidt. IF CGS.EQ.0 then c reorthogonalization is done using iterated modified Gram-Schmidt. c ioption(2) = ELR. If ELR.EQ.1 then extended local orthogonality is c enforced among u_{k}, u_{k+1} and v_{k} and v_{k+1} respectively. c c INFO: INTEGER. c INFO = 0 : The K largest or smallest (depending on the c value of WHICH) singular triplets were computed c succesfully. c INFO = J>0, J<K: An invariant subspace of dimension J was found. c INFO = -1 : K singular triplets did not converge within KMAX c iterations. c DPARM: DOUBLE PRECISION array. Array used for passing data to the APROD c function. c IPARM: INTEGER array. Array used for passing data to the APROD c function. c c c (C) Rasmus Munk Larsen, Stanford University, 2000,2004 c c %-----------% c | Arguments | c %-----------% implicit none include 'stat.h' character*1 which,jobu,jobv integer m,n,p,neig,maxiter,ldu,ldv,iter,liwork integer iwork(liwork),lwork,info,ioption(*) double precision U(ldu,*),V(ldv,*),Sigma(*),bnd(*),work(lwork) double precision dparm(*),tolin,doption(*) integer iparm(*) external aprod c %------------% c | Parameters | c %------------% double precision one, zero, FUDGE parameter(one = 1.0, zero = 0.0, FUDGE = 1.01) c %-----------------% c | Local variables | c %-----------------% integer k,i,ibnd,iwrk,ierr,ip,iq,nconv,lwrk,kold,dim integer ialpha,ibeta,ialpha1,ibeta1,ishift,nshft,lapinfo double precision eps,eps34,epsn2,epsn,sfmin,anorm,rnorm,tol double precision shift, relgap real t0,t1,t2,t3 integer st,cnk,wst,wcnk, tid, nt c %----------------------% c | External Subroutines | c %----------------------% external dzero,izero,dcopy,daxpy,dbdsqr,dgemm c %--------------------% c | External Functions | c %--------------------% logical lsame double precision dlamch,pdnrm2,ddot,dlapy2 external pdnrm2,ddot,lsame external dlamch,dlapy2 #ifdef _OPENMP integer omp_get_thread_num, omp_get_num_threads external omp_get_thread_num, omp_get_num_threads #endif c-------------------- Here begins executable code --------------------- c %-------------% c | Start timer | c %-------------% call second(t0) c %---------------------------------% c | Set machine dependent constants | c %---------------------------------% eps = dlamch('e') eps34 = eps**(3.0/4.0) epsn = dble(max(m,n))*eps/2.0 epsn2 = sqrt(dble(max(m,n)))*eps/2.0 sfmin = dlamch('s') c %--------------------------------% c | Guard against absurd arguments | c %--------------------------------% dim = min(dim,n+1,m+1) k = dim-p tol = min(one,max(16.0*eps,tolin)) anorm = zero c %------------------------------% c | Set pointers into work array | c %------------------------------% ibnd = 1 ialpha = ibnd + dim+1 ibeta = ialpha + dim ialpha1 = ibeta + dim ibeta1 = ialpha1 + dim ishift = ibeta1 + dim ip = ishift + dim iq = ip + (dim+1)**2 iwrk = iq + dim**2 lwrk = lwork-iwrk+1 call dzero(8*dim + 3 + 2*dim**2,work,1) c %---------------------------------------------------------------% c | Set up random starting vector if none is provided by the user | c %---------------------------------------------------------------% rnorm = pdnrm2(m,U(1,1),1) if (rnorm.eq.zero) then call dgetu0('n',m,n,0,1,U,rnorm,U,ldu,aprod, c dparm,iparm, ierr,ioption(1),anorm,work(iwrk)) endif iter = 0 info = 0 nconv = 0 kold = 0 c %------------------------------% c | Iterate until convergence... | c %------------------------------% do while (nconv.lt.neig .and. iter.lt.maxiter) c %---------------------------------------------------% c | Compute bidiagonalization A*V_{j} = U_{j+1}*B_{j} | c %---------------------------------------------------% call dlanbpro(m, n, kold, dim, aprod, U, ldu, V, ldv, c work(ialpha),dim,rnorm,doption(1),ioption(1), c work(iwrk), iwork, dparm, iparm, ierr) kold = k c %---------------------------------------------% c | Compute and analyze SVD(B) and error bounds | c %---------------------------------------------% call dcopy(dim, work(ialpha),1,work(ialpha1),1) call dcopy(dim, work(ibeta),1,work(ibeta1),1) call dzero(dim+1,work(ibnd),1) call second(t2) call dbdqr((dim.eq.min(m,n)),'N',dim,work(ialpha1), c work(ibeta1),work(ibnd+dim-1),work(ibnd+dim), c work(ip),dim+1) call dbdsqr('u',dim,0,1,0,work(ialpha1),work(ibeta1),work,1, c work(ibnd),1,work,1,work(iwrk),lapinfo) call second(t3) tbsvd = tbsvd + (t3-t2) nbsvd = nbsvd + 1 if (dim.gt.5) then anorm = work(ialpha1) else anorm = max(anorm,work(ialpha1)) endif do i=1,dim work(ibnd+i-1) = abs(rnorm*work(ibnd+i-1)) c write (*,*) 'bnd(',i,') = ',work(ibnd+i-1) enddo c %---------------------------------------------% c | Refine error bounds using the "Gap theorem" | c %---------------------------------------------% if (lsame(which,'s')) then call drefinebounds(min(m,n),dim,work(ialpha1),work(ibnd), c epsn*anorm,eps34) else call drefinebounds(min(m,n), min(dim,neig),work(ialpha1), c work(ibnd),epsn*anorm,eps34) endif c do i=1,dim c write (*,*) 'bnd(',i,') = ',work(ibnd+i-1) c enddo c %----------------------------------------------------% c | Determine the number of converged singular values | c %----------------------------------------------------% c do i=1,min(dim,neig) c write(*,*) iter,i,work(ialpha1+i-1),work(ibnd+i-1) c enddo if (lsame(which,'s')) then i = dim-neig+1 nconv = 0 do while(i.le.dim) c write(*,*) 'iter = ',iter,'sigma = ',work(ialpha1+i-1), c c 'error = ',work(ibnd+i-1) c write(*,*) iter,work(ialpha1+i-1),work(ibnd+i-1) if (work(ibnd+i-1).le.tol*work(ialpha1)) then nconv = nconv + 1 sigma(nconv) = work(ialpha1+i-1) bnd(nconv) = work(ibnd+i-1) endif i = i+1 enddo else i = 1 nconv = 0 do while(i.le.min(dim,neig)) c write(*,*) iter,work(ialpha1+i-1),work(ibnd+i-1) if (work(ibnd+i-1).le.tol*work(ialpha1+i-1)) then nconv = nconv + 1 sigma(nconv) = work(ialpha1+i-1) bnd(nconv) = work(ibnd+i-1) i = i+1 else i = k+1 endif enddo endif c %-----------------------------------------------% c | Test if an invariant subspace have been found | c %-----------------------------------------------% if (ierr.lt.0) then if (dim.lt.k) then write(*,*) 'WARNING: Invariant subspace found.', c ' Dimension = ',dim info = dim endif goto 50 endif if (nconv.lt.neig) then c %---------------------------------------------------------------------% c | Implicit restart: | c | Apply shifts mu_1, mu_2,...,mu_p to "back up" the | c | bidiagonalization (dim-k) steps to | c | | c | A V_{k}^{+} = U_{k+1}^{+} B_{k}^{+} | c | corresponding to starting vector | c | u_1^{+} = \prod_{i=1}^{p} (A A^T - mu_i^2) u_1 | c | | c | We use exact shifts mu_i for which the relative gap between mu_i | c | and the lower bound on the k'th Ritzvalue is larger than doption(4) | c %---------------------------------------------------------------------% call second(t2) call dzero(dim-k,work(ishift),1) nshft = 0 if (lsame(which,'s')) then do i=1,k relgap = (work(ialpha1+i-1)-work(ibnd+i-1) - c work(ialpha1+dim-neig-1)) if (relgap.gt.doption(4)*work(ialpha1+dim-neig-1)) c then work(ishift + nshft) = work(ialpha1+i-1) else work(ishift + nshft) = work(ialpha1) endif c write (*,*) 'shift = ',work(ishift + nshft) nshft = nshft + 1 enddo else do i=dim,k+1,-1 relgap = work(ialpha1+k-1) - c (work(ialpha1+i-1)+work(ibnd+i-1)) if (relgap.gt.doption(4)*work(ialpha1+k-1)) then work(ishift + nshft) = work(ialpha1+i-1) else work(ishift + nshft) = 0d0 endif nshft = nshft + 1 enddo endif c %--------------------------------------------------% c | Apply shifts and accumulate rotations such that | c | B_{dim}^{+} = P * B_{dim} * Q^T | c %--------------------------------------------------% call dzero((dim+1)*(dim+1),work(ip),1) call dzero(dim*dim,work(iq),1) do i=1,dim+1 work(ip+(i-1)*(dim+2)) = one enddo do i=1,dim work(iq+(i-1)*(dim+1)) = one enddo do i=dim,k+1,-1 shift = work(ishift+dim-i) call dbsvdstep('y','y',dim+1,dim,i,shift,work(ialpha), c work(ibeta),work(ip),dim+1,work(iq), dim) enddo c %---------------------------------------------------% c | Compute first k+1 left and first k right updated | c | Lanczos vectors | c | U_{dim+1}^{+} = U_{dim+1} * P(:,1:k+1) | c | V_{dim}^{+} = V_{dim} * Q(:,1:k) | c %---------------------------------------------------% #ifdef _OPENMP c$OMP PARALLEL private(tid,nt,cnk,st,wcnk,wst) tid = omp_get_thread_num() nt = omp_get_num_threads() #else tid = 0 nt = 1 #endif wcnk = lwrk/nt wst = tid*wcnk+1 cnk = m/nt st = tid*cnk+1 if (tid.eq.nt-1) then wcnk = lwrk-wst+1 cnk = m-st+1 endif call dgemm_ovwr_left('n',cnk,k+1,dim+1,1d0,U(st,1),ldu,0d0, c work(ip),dim+1,work(iwrk+wst-1),wcnk) cnk = n/nt st = tid*cnk+1 if (tid.eq.nt-1) then cnk = n-st+1 endif call dgemm_ovwr_left('n',cnk,k,dim,1d0,V(st,1),ldv,0d0, c work(iq),dim,work(iwrk+wst-1),wcnk) #ifdef _OPENMP c$OMP END PARALLEL #endif rnorm = work(ibeta+k-1) call second(t3) trestart = trestart + (t3-t2) nrestart = nrestart + 1 endif iter = iter + 1 enddo 50 if ((nconv.ge.neig .or. info.gt.0) .and. c (lsame(jobu,'y') .or. lsame(jobv,'y'))) then c %-----------------------------------------% c | Calculate singular vectors if requested % c %-----------------------------------------% call dcopy(dim, work(ialpha),1,work(ialpha1),1) call dcopy(dim, work(ibeta),1,work(ibeta1),1) lwrk = lwrk + dim**2 + (dim+1)**2 call dritzvec(which, jobu,jobv,m,n,nconv,dim,work(ialpha1), c work(ibeta1),work(ialpha1),U,ldu,V,ldv,work(ip), c lwrk,iwork) endif neig = nconv nlandim = dim call second(t1) tlansvd = t1-t0 end