define(s_type,`real(skind)') define(d_type,`real(dkind)') define(c_type,`complex(skind)') define(z_type,`complex(dkind)') define(i_type,`integer') define(TYPE,`$1_type') module blas3 implicit none private integer,parameter:: skind = kind(1.e0), dkind = kind(1.d0) ! ones s_type,parameter:: s_one = real(1,kind=skind) d_type,parameter:: d_one = real(1,kind=dkind) c_type,parameter:: c_one = cmplx(1,kind=skind) z_type,parameter:: z_one = cmplx(1,kind=dkind) ! zeros s_type,parameter:: s_zero = real(0,kind=skind) d_type,parameter:: d_zero = real(0,kind=dkind) c_type,parameter:: c_zero = cmplx(0,kind=skind) z_type,parameter:: z_zero = cmplx(0,kind=dkind) character,parameter:: blas_trans = "T" character,parameter:: blas_no_trans = "N" character,parameter:: blas_conj_trans = "H" character,parameter:: blas_lower = "L" character,parameter:: blas_upper = "U" character,parameter:: blas_unit_diag = "U" character,parameter:: blas_non_unit_diag = "N" character,parameter:: blas_left_side = "L" character,parameter:: blas_right_side = "R" public blas_trans, blas_no_trans, blas_conj_trans public blas_lower, blas_upper public blas_unit_diag, blas_non_unit_diag public blas_left_side, blas_right_side ! sizeof is a less common extension, so provide sizes manually integer,parameter:: s_size = 4 integer,parameter:: d_size = 8 integer,parameter:: c_size = 2*s_size integer,parameter:: z_size = 2*d_size ifdef(`EXTERNALLOC',`integer,external:: loc') ifdef(`UNSAFE',`! this is unsafe version',`define(SAFE) ! this is safe version') define(MAKEPUBLIC,` ifdef(``ISPUBLIC'$1`'',`! $1 already public',` public $1 define(``ISPUBLIC'$1`'') ') ') define(BIND,`interface $1 module procedure s$2_95 module procedure d$2_95 module procedure c$2_95 module procedure z$2_95 end interface MAKEPUBLIC($1)') define(BINDR,`interface $1 module procedure s$2_95 module procedure d$2_95 end interface MAKEPUBLIC($1)') define(BINDC,`interface $1 module procedure c$2_95 module procedure z$2_95 end interface MAKEPUBLIC($1)') BIND(GEMM,GEMV) BINDR(SYMM,SYMV) BINDC(HEMM,HEMV) BIND(TRMM,TRMV) BIND(TRSM,TRSV) BINDR(GEMM,GER) BINDC(GEMM,GERUC) BINDR(SYRK,SYR) BINDC(HERK,HER) BINDR(SYR2K,SYR2) BINDC(HER2K,HER2) BIND(GEMM,GEMM) BIND(SYMM,SYMM) BINDC(HEMM,HEMM) BIND(TRMM,TRMM) BIND(TRSM,TRSM) BINDR(SYRK,SYRK) BINDC(HERK,HERK) BINDR(SYR2K,SYR2K) BINDC(HER2K,HER2K) external:: sblas3_gescal external:: dblas3_gescal external:: cblas3_gescal external:: zblas3_gescal external:: sblas3_syscal external:: dblas3_syscal external:: cblas3_syscal external:: zblas3_syscal contains ifdef(`SAFE',` define(MATEXT,` subroutine $1matext(mat,ppmat,ldmat,wwmat) $1_type,target:: mat(:,:) $1_type,pointer:: ppmat,wwmat(:,:) integer,intent(out):: ldmat if (size(mat,1) > 1) then if (loc(mat(2,1)) - loc(mat(1,1)) /= $1_size) goto 1 end if ldmat = 1 if (size(mat,2) > 1) ldmat = (loc(mat(1,2)) - loc(mat(1,1))) / $1_size if (ldmat < 0) goto 1 ppmat => mat(1,1) return 1 continue allocate(wwmat(size(mat,1),size(mat,2))) ppmat => wwmat(1,1) ldmat = size(mat,1) end subroutine ') MATEXT(s) MATEXT(d) MATEXT(c) MATEXT(z) ') define(VECARG,` TYPE(XC),dimension(:),intent($2):: $1 integer:: inc$1 pushdef(`INIT',` inc$1 = 1 if (size($1) > 1) inc$1 = (loc($1(2)) - loc($1(1)))/XC`_size' popdef(`INIT') INIT ') ') ifdef(`SAFE',` define(MATASS,`associated(pp$1,$1(1,1))') define(MATARG,` TYPE(XC),dimension(:,:),intent($2),target:: $1 TYPE(XC),pointer:: pp$1,ww$1(:,:) integer:: ld$1 pushdef(`COPY',` call XC`matext'($1,pp$1,ld$1,ww$1) if (.not.MATASS($1)) ww$1 = $1 popdef(`COPY') COPY ') pushdef(`UNCOPY',` if (.not.MATASS($1)) then ifelse($2,inout,`$1 = ww$1') deallocate(ww$1) end if popdef(`UNCOPY') UNCOPY ') ') ',` define(MATARG,` TYPE(XC),dimension(:,:),intent($2),target:: $1 TYPE(XC),pointer:: pp$1 integer:: ld$1 pushdef(`COPY',` ld$1 = 1 pp$1 => $1(1,1) if (size($1,2) > 1) ld$1 = (loc($1(1,2)) - loc($1(1,1)))/XC`_size' popdef(`COPY') COPY ') ') ') define(CHARG,`character,intent(in),optional:: $1 character:: v$1 pushdef(`INIT',` v$1 = $2 if (present($1)) v$1 = $1 popdef(`INIT') INIT ') ') define(ALFARG,` TYPE(XC),intent(in),optional:: alpha TYPE(XC):: valpha pushdef(`INIT',` valpha = XC`_one' if (present(alpha)) valpha = alpha popdef(`INIT') INIT ') ') define(BETARG,` TYPE(XC),intent(in),optional:: beta TYPE(XC):: vbeta pushdef(`INIT',` vbeta = XC`_'$1 if (present(beta)) vbeta = beta popdef(`INIT') INIT ') ') define(SIZE,` integer:: $1 pushdef(`INIT',` $1 = size($2,$3) if ($1 == 0) return popdef(`INIT') INIT ') ') define(`INIT',`! end init') define(`COPY',`! end copy') define(`UNCOPY',`! end uncopy') !================================================================================ ! LEVEL 2 BLAS routines !================================================================================ !======================================== ! GEMV implementation define(GEMV,` subroutine $1GEMV_95(a,b,c,transa,alpha,beta) pushdef(`XC',$1) MATARG(a,in) VECARG(b,in) VECARG(c,inout) ALFARG() BETARG(zero) CHARG(transa,blas_no_trans) SIZE(m,a,1) SIZE(n,a,2) INIT COPY call $1GEMV(vtransa,m,n,valpha,& ppa,lda,b(1),incb,vbeta,c(1),incc) UNCOPY end subroutine popdef(`XC') ') GEMV(s) GEMV(d) GEMV(c) GEMV(z) !======================================== ! SYMV implementation define(SYMV,` subroutine $1SYMV_95(a,b,c,uplo,alpha,beta) pushdef(`XC',$1) MATARG(a,in) VECARG(b,in) VECARG(c,inout) ALFARG() BETARG(zero) CHARG(uplo,blas_upper) SIZE(n,a,1) INIT COPY call $1SYMV(vuplo,n,valpha,& ppa,lda,b(1),incb,vbeta,c(1),incc) UNCOPY end subroutine popdef(`XC') ') SYMV(s) SYMV(d) !======================================== ! HEMV implementation define(HEMV,` pushdef(`XC',$1) subroutine $1HEMV_95(a,b,c,uplo,alpha,beta) MATARG(a,in) VECARG(b,in) VECARG(c,inout) ALFARG() BETARG(zero) CHARG(uplo,blas_upper) SIZE(n,a,1) INIT COPY call $1HEMV(vuplo,n,valpha,& ppa,lda,b(1),incb,vbeta,c(1),incc) UNCOPY end subroutine popdef(`XC') ') HEMV(c) HEMV(z) !======================================== ! TRMV implementation define(TRMV,` pushdef(`XC',$1) subroutine $1TRMV_95(t,b,side,uplo,transt,diag,alpha) MATARG(t,in) VECARG(b,inout) CHARG(side,blas_left_side) CHARG(uplo,blas_upper) CHARG(transt,blas_no_trans) CHARG(diag,blas_non_unit_diag) ALFARG() SIZE(n,t,1) INIT if (valpha == $1_zero) then b = $1_zero return end if COPY call $1TRMV(vuplo,vtranst,vdiag,n,ppt,ldt,b(1),incb) call $1SCAL(n,valpha,b(1),incb) UNCOPY end subroutine popdef(`XC') ') TRMV(s) TRMV(d) TRMV(c) TRMV(z) !======================================== ! TRSV implementation define(TRSV,` pushdef(`XC',$1) subroutine $1TRSV_95(t,b,side,uplo,transt,diag,alpha) MATARG(t,in) VECARG(b,inout) CHARG(side,blas_left_side) CHARG(uplo,blas_upper) CHARG(transt,blas_no_trans) CHARG(diag,blas_non_unit_diag) ALFARG() SIZE(n,t,1) INIT if (valpha == $1_zero) then b = $1_zero return end if COPY call $1TRSV(vuplo,vtranst,vdiag,n,ppt,ldt,b(1),incb) call $1SCAL(n,valpha,b(1),incb) UNCOPY end subroutine popdef(`XC') ') TRSV(s) TRSV(d) TRSV(c) TRSV(z) !======================================== ! GER implementation define(GER,` pushdef(`XC',$1) subroutine $1GER_95(a,b,c,alpha,beta) VECARG(a,in) VECARG(b,in) MATARG(c,inout) ALFARG() BETARG(zero) SIZE(m,c,1) SIZE(n,c,2) integer:: j INIT COPY call $1gescal(m,n,ppc,ldc,vbeta) call $1GER(m,n,valpha,a(1),inca,b(1),incb,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') GER(s) GER(d) define(GERUC,` pushdef(`XC',$1) subroutine $1GERUC_95(a,b,c,transb,alpha,beta) VECARG(a,in) VECARG(b,in) MATARG(c,inout) CHARG(transb,blas_trans) ALFARG() BETARG(zero) SIZE(m,c,1) SIZE(n,c,2) integer:: j INIT COPY call $1gescal(m,n,ppc,ldc,vbeta) if (vtransb == blas_conj_trans) then call $1GERC(m,n,valpha,a(1),inca,b(1),incb,ppc,ldc) else call $1GERU(m,n,valpha,a(1),inca,b(1),incb,ppc,ldc) end if UNCOPY end subroutine popdef(`XC') ') GERUC(c) GERUC(z) !======================================== ! SYR implementation define(SYR,` pushdef(`XC',$1) subroutine $1SYR_95(a,c,uplo,alpha,beta) VECARG(a,in) MATARG(c,inout) CHARG(uplo,blas_upper) ALFARG() BETARG(zero) SIZE(n,c,1) integer:: j INIT COPY call $1syscal(n,ppc,ldc,vbeta,vuplo) call $1SYR(vuplo,n,valpha,a(1),inca,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') SYR(s) SYR(d) !======================================== ! HER implementation define(HER,` pushdef(`XC',$1) subroutine $1HER_95(a,c,uplo,alpha,beta) VECARG(a,in) MATARG(c,inout) CHARG(uplo,blas_upper) ALFARG() BETARG(zero) SIZE(n,c,1) integer:: j INIT COPY call $1syscal(n,ppc,ldc,vbeta,vuplo) call $1HER(vuplo,n,valpha,a(1),inca,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') HER(c) HER(z) !======================================== ! SYR2 implementation define(SYR2,` pushdef(`XC',$1) subroutine $1SYR2_95(a,b,c,uplo,alpha,beta) VECARG(a,in) VECARG(b,in) MATARG(c,inout) CHARG(uplo,blas_upper) ALFARG() BETARG(zero) SIZE(n,c,1) integer:: j INIT COPY call $1syscal(n,ppc,ldc,vbeta,vuplo) call $1SYR2(vuplo,n,valpha,a(1),inca,b(1),incb,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') SYR2(s) SYR2(d) !======================================== ! HER2 implementation define(HER2,` pushdef(`XC',$1) subroutine $1HER2_95(a,b,c,uplo,alpha,beta) VECARG(a,in) VECARG(b,in) MATARG(c,inout) CHARG(uplo,blas_upper) ALFARG() BETARG(zero) SIZE(n,c,1) integer:: j INIT COPY call $1syscal(n,ppc,ldc,vbeta,vuplo) call $1HER2(vuplo,n,valpha,a(1),inca,b(1),incb,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') HER2(c) HER2(z) !================================================================================ ! LEVEL 3 BLAS routines !================================================================================ !======================================== ! GEMM implementation define(GEMM,` pushdef(`XC',$1) subroutine $1GEMM_95(a,b,c,transa,transb,alpha,beta) MATARG(a,in) MATARG(b,in) MATARG(c,inout) CHARG(transa,blas_no_trans) CHARG(transb,blas_no_trans) ALFARG() BETARG(zero) SIZE(m,c,1) SIZE(n,c,2) integer:: k INIT k = merge(size(a,1),size(a,2),vtransa == blas_no_trans) if (k == 0) return COPY call $1GEMM(vtransa,vtransb,m,n,k,valpha,ppa,lda,ppb,ldb,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') GEMM(s) GEMM(d) GEMM(c) GEMM(z) !======================================== ! SYMM implementation define(SYMM,` pushdef(`XC',$1) subroutine $1SYMM_95(a,b,c,side,uplo,alpha,beta) MATARG(a,in) MATARG(b,in) MATARG(c,inout) ALFARG() BETARG(zero) CHARG(side,blas_left_side) CHARG(uplo,blas_upper) SIZE(m,c,1) SIZE(n,c,2) INIT COPY call $1SYMM(vside,vuplo,m,n,valpha,ppa,lda,ppb,ldb,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') SYMM(s) SYMM(d) SYMM(c) SYMM(z) !======================================== ! HEMM implementation define(HEMM,` pushdef(`XC',$1) subroutine $1HEMM_95(a,b,c,side,uplo,alpha,beta) MATARG(a,in) MATARG(b,in) MATARG(c,inout) ALFARG() BETARG(zero) CHARG(side,blas_left_side) CHARG(uplo,blas_upper) SIZE(m,c,1) SIZE(n,c,2) INIT COPY call $1HEMM(vside,vuplo,m,n,valpha,& ppa,lda,ppb,ldb,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') HEMM(c) HEMM(z) !======================================== ! TRMM implementation define(TRMM,` pushdef(`XC',$1) subroutine $1TRMM_95(t,b,side,uplo,transt,diag,alpha) MATARG(t,in) MATARG(b,inout) ALFARG() CHARG(side,blas_left_side) CHARG(uplo,blas_upper) CHARG(transt,blas_no_trans) CHARG(diag,blas_non_unit_diag) SIZE(m,b,1) SIZE(n,b,2) INIT COPY call $1TRMM(vside,vuplo,vtranst,vdiag,m,n,valpha,ppt,ldt,ppb,ldb) UNCOPY end subroutine popdef(`XC') ') TRMM(s) TRMM(d) TRMM(c) TRMM(z) !======================================== ! TRSM implementation define(TRSM,` pushdef(`XC',$1) subroutine $1TRSM_95(t,b,side,uplo,transt,diag,alpha) MATARG(t,in) MATARG(b,inout) ALFARG() CHARG(side,blas_left_side) CHARG(uplo,blas_upper) CHARG(transt,blas_no_trans) CHARG(diag,blas_non_unit_diag) SIZE(m,b,1) SIZE(n,b,2) INIT COPY call $1TRSM(vside,vuplo,vtranst,vdiag,m,n,valpha,ppt,ldt,ppb,ldb) UNCOPY end subroutine popdef(`XC') ') TRSM(s) TRSM(d) TRSM(c) TRSM(z) !======================================== ! SYRK implementation define(SYRK,` pushdef(`XC',$1) subroutine $1SYRK_95(a,c,uplo,trans,alpha,beta) MATARG(a,in) MATARG(c,in) ALFARG() BETARG(zero) CHARG(uplo,blas_upper) CHARG(trans,blas_no_trans) SIZE(n,c,1) integer:: k INIT k = merge(size(a,2),size(a,1),vtrans == blas_no_trans) if (k == 0) return COPY call $1SYRK(vuplo,vtrans,n,k,valpha,ppa,lda,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') SYRK(s) SYRK(d) SYRK(c) SYRK(z) !======================================== ! HERK implementation define(HERK,` pushdef(`XC',$1) subroutine $1HERK_95(a,c,uplo,trans,alpha,beta) MATARG(a,in) MATARG(c,in) ALFARG() BETARG(zero) CHARG(uplo,blas_upper) CHARG(trans,blas_no_trans) SIZE(n,c,1) integer:: k INIT k = merge(size(a,2),size(a,1),vtrans == blas_no_trans) if (k == 0) return COPY call $1HERK(vuplo,vtrans,n,k,valpha,ppa,lda,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') HERK(c) HERK(z) !======================================== ! SYR2K implementation define(SYR2K,` pushdef(`XC',$1) subroutine $1SYR2K_95(a,b,c,uplo,trans,alpha,beta) MATARG(a,in) MATARG(b,in) MATARG(c,inout) ALFARG() BETARG(zero) CHARG(uplo,blas_upper) CHARG(trans,blas_no_trans) SIZE(n,c,1) integer:: k INIT k = merge(size(a,2),size(a,1),vtrans == blas_no_trans) if (k == 0) return COPY call $1SYR2K(vuplo,vtrans,n,k,valpha,ppa,lda,ppb,ldb,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') SYR2K(s) SYR2K(d) SYR2K(c) SYR2K(z) !======================================== ! HER2K implementation define(HER2K,` pushdef(`XC',$1) subroutine $1HER2K_95(a,b,c,uplo,trans,alpha,beta) MATARG(a,in) MATARG(b,in) MATARG(c,inout) ALFARG() BETARG(zero) CHARG(uplo,blas_upper) CHARG(trans,blas_no_trans) SIZE(n,c,1) integer:: k INIT k = merge(size(a,2),size(a,1),vtrans == blas_no_trans) if (k == 0) return COPY call $1HER2K(vuplo,vtrans,n,k,valpha,ppa,lda,ppb,ldb,vbeta,ppc,ldc) UNCOPY end subroutine popdef(`XC') ') HER2K(c) HER2K(z) end module blas3 ! matrix scaling routines written in F77 BLAS style, for extending the functionality ! of level 2 rank 1/2 update procedures by an optional beta argument. ! These have to be external, otherwise the interface checking would not allow us ! to call them in the way we need. define(GESCAL,` subroutine $1blas3_gescal(m,n,a,lda,beta) integer,parameter:: skind = kind(1.e0), dkind = kind(1.d0) ! ones s_type,parameter:: s_one = real(1,kind=skind) d_type,parameter:: d_one = real(1,kind=dkind) c_type,parameter:: c_one = cmplx(1,kind=skind) z_type,parameter:: z_one = cmplx(1,kind=dkind) ! zeros s_type,parameter:: s_zero = real(0,kind=skind) d_type,parameter:: d_zero = real(0,kind=dkind) c_type,parameter:: c_zero = cmplx(0,kind=skind) z_type,parameter:: z_zero = cmplx(0,kind=dkind) integer,intent(in):: m,n,lda TYPE($1):: a(lda,*),beta integer:: j if (beta == $1_zero) then a(1:m,1:n) = $1_zero else if (beta /= $1_one) then do j=1,n call $1scal(m,beta,a(1,j),1) end do end if end subroutine ') GESCAL(s) GESCAL(d) GESCAL(c) GESCAL(z) define(SYSCAL,` subroutine $1blas3_syscal(m,n,a,lda,beta,uplo) integer,parameter:: skind = kind(1.e0), dkind = kind(1.d0) ! ones s_type,parameter:: s_one = real(1,kind=skind) d_type,parameter:: d_one = real(1,kind=dkind) c_type,parameter:: c_one = cmplx(1,kind=skind) z_type,parameter:: z_one = cmplx(1,kind=dkind) ! zeros s_type,parameter:: s_zero = real(0,kind=skind) d_type,parameter:: d_zero = real(0,kind=dkind) c_type,parameter:: c_zero = cmplx(0,kind=skind) z_type,parameter:: z_zero = cmplx(0,kind=dkind) integer,intent(in):: m,n,lda TYPE($1):: a(lda,*),beta character,intent(in):: uplo integer:: j if (uplo == "U") then if (beta == $1_zero) then do j=1,n a(1:j,j) = $1_zero end do else if (beta /= $1_one) then do j=1,n call $1scal(j,beta,a(1,j),1) end do end if else if (beta == $1_zero) then do j=1,n a(j:m,j) = $1_zero end do else if (beta /= $1_one) then do j=1,n call $1scal(m+j-1,beta,a(j,j),1) end do end if end if end subroutine ') SYSCAL(s) SYSCAL(d) SYSCAL(c) SYSCAL(z)