#include "cppdefs.h"

      MODULE ad_bcov_3d_mod

#if (defined S4DVAR || defined IS4DVAR || defined W4DVAR) && \
     defined SOLVE3D
!
!=================================================== Andrew M. Moore ===
!  Copyright (c) 2005 ROMS/TOMS Adjoint Group                          !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This routines compute the background error covariance for data      !
!  assimilation via a space convolution of the diffusion equation      !
!  for 3D state variables. The diffusion equation is solved using      !
!  an explicit (inefficient) algorithm.                                !
!                                                                      !
!  For Gaussian (bell-shaped) covariances,  the space convolution      !
!  of the diffusion operator is an efficient way  to estimate the      !
!  finite domain error covariances.                                    !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension lower bound.                              !
!     UBi        I-dimension upper bound.                              !
!     LBj        J-dimension lower bound.                              !
!     UBj        J-dimension upper bound.                              !
!     LBk        K-dimension lower bound.                              !
!     UBk        K-dimension upper bound.                              !
!     Nghost     Number of ghost points.                               !
!     NHsteps    Number of horizontal diffusion integration steps.     !
!     NVsteps    Number of vertical   diffusion integration steps.     !
!     DTsizeH    Horizontal diffusion pseudo time-step size.           !
!     DTsizeV    Vertical   diffusion pseudo time-step size.           !
!     Kh         Horizontal diffusion coefficients.                    !
!     Kv         Vertical   diffusion coefficients.                    !
!     ad_A       3D adjoint state variable to diffuse.                 !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     ad_A       Diffused 3D adjoint state variable.                   !      
!                                                                      !
!  Routines:                                                           !
!                                                                      !
!    ad_bcov_r3d_tile  Background error covariance at RHO-points       !
!    ad_bcov_u3d_tile  Background error covariance at U-points         !
!    ad_bcov_v3d_tile  Background error covariance at V-points         !
!                                                                      !
!=======================================================================
!
      implicit none
 
      PUBLIC

      CONTAINS
!
!***********************************************************************
      SUBROUTINE ad_bcov_r3d_tile (ng, model, Istr, Iend, Jstr, Jend,   &
     &                             LBi, UBi, LBj, UBj, LBk, UBk,        &
     &                             Nghost, NHsteps, NVsteps,            &
     &                             DTsizeH, DTsizeV,                    &
     &                             Kh, Kv,                              &
     &                             pm, pn, pmon_u, pnom_v,              &
# ifdef MASKING
     &                             umask, vmask,                        &
# endif
     &                             Hz, z_r,                             &
     &                             ad_A)
!***********************************************************************
!
      USE mod_param
!
      USE ad_bc_3d_mod, ONLY: ad_bc_r3d_tile
# ifdef DISTRIBUTE
      USE mp_exchange_mod, ONLY : ad_mp_exchange3d
# endif
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(in) :: Nghost, NHsteps, NVsteps

      real(r8), intent(in) :: DTsizeH, DTsizeV
!
# ifdef ASSUMED_SHAPE
      real(r8), intent(in) :: pm(LBi:,LBj:)
      real(r8), intent(in) :: pn(LBi:,LBj:)
      real(r8), intent(in) :: pmon_u(LBi:,LBj:)
      real(r8), intent(in) :: pnom_v(LBi:,LBj:)
#  ifdef MASKING
      real(r8), intent(in) :: umask(LBi:,LBj:)
      real(r8), intent(in) :: vmask(LBi:,LBj:)
#  endif
      real(r8), intent(in) :: Hz(LBi:,LBj:,:)
      real(r8), intent(in) :: z_r(LBi:,LBj:,:)
      real(r8), intent(in) :: Kh(LBi:,LBj:)
      real(r8), intent(in) :: Kv(LBi:,LBj:,0:)

      real(r8), intent(inout) :: ad_A(LBi:,LBj:,LBk:)
# else
      real(r8), intent(in) :: pm(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pn(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pmon_u(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pnom_v(LBi:UBi,LBj:UBj)
#  ifdef MASKING
      real(r8), intent(in) :: umask(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: vmask(LBi:UBi,LBj:UBj)
#  endif
      real(r8), intent(in) :: Hz(LBi:UBi,LBj:UBj,N(ng))
      real(r8), intent(in) :: z_r(LBi:UBi,LBj:UBj,N(ng))
      real(r8), intent(in) :: Kh(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: Kv(LBi:UBi,LBj:UBj,0:UBk)

      real(r8), intent(inout) :: ad_A(LBi:UBi,LBj:UBj,LBk:UBk)
# endif
!
!  Local variable declarations.
!
# ifdef DISTRIBUTE
#  ifdef EW_PERIODIC
      logical :: EWperiodic=.TRUE.
#  else
      logical :: EWperiodic=.FALSE.
#  endif
#  ifdef NS_PERIODIC
      logical :: NSperiodic=.TRUE.
#  else
      logical :: NSperiodic=.FALSE.
#  endif
# endif
      integer :: IstrR, IendR, JstrR, JendR, IstrU, JstrV
      integer :: ILB, IUB, JLB, JUB
      integer :: Nnew, Nold, Nsav, i, j, k, step

      real(r8) :: adfac

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: Hfac

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng)) :: Vfac
      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng)) :: oHz

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng),2) :: ad_Awrk

      real(r8), dimension(PRIVATE_1D_SCRATCH_ARRAY,0:N(ng)) :: ad_FS

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: ad_FE
      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: ad_FX

# include "set_bounds.h"
!
      ILB=LBOUND(ad_Awrk,DIM=1)
      IUB=UBOUND(ad_Awrk,DIM=1)
      JLB=LBOUND(ad_Awrk,DIM=2)
      JUB=UBOUND(ad_Awrk,DIM=2)
!
!-----------------------------------------------------------------------
!  Initialize adjoint private variables.
!-----------------------------------------------------------------------
!
      ad_Awrk(ILB:IUB,JLB:JUB,1:N(ng),1:2)=0.0_r8

      ad_FS(ILB:IUB,0:N(ng))=0.0_r8

      ad_FE(ILB:IUB,JLB:JUB)=0.0_r8
      ad_FX(ILB:IUB,JLB:JUB)=0.0_r8
!
!-----------------------------------------------------------------------
!  Adjoint space convolution of the diffusion equation for a 2D state
!  variable at RHO-points.
!-----------------------------------------------------------------------
!
!  Compute metrics factors.  Notice that "z_r" and "Hz" are assumed to
!  be time invariant in the vertical convolution.  Scratch array are
!  used for efficiency.
!
      DO j=Jstr-1,Jend+1
        DO i=Istr-1,Iend+1
          Hfac(i,j)=DTsizeH*pm(i,j)*pn(i,j)
          DO k=1,N(ng)-1
            Vfac(i,j,k)=DTsizeV*Kv(i,j,k)/(z_r(i,j,k+1)-z_r(i,j,k))
          END DO
          DO k=1,N(ng)
            oHz(i,j,k)=1.0_r8/Hz(i,j,k)
          END DO
        END DO
      END DO
!
!  Load adjoint solution.
!
      Nold=1
      Nnew=2
      DO k=1,N(ng)
        DO j=JstrR,JendR
          DO i=IstrR,IendR
!>          tl_A(i,j,k)=tl_Awrk(i,j,k,Nold)
!>
            ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+ad_A(i,j,k)
            ad_A(i,j,k)=0.0_r8
          END DO
        END DO
      END DO
!
!-----------------------------------------------------------------------
!  Integrate adjoint horizontal diffusion terms.
!-----------------------------------------------------------------------
!
      DO step=1,NHsteps
!
!  Update integration indices.
!
        Nsav=Nnew
        Nnew=Nold
        Nold=Nsav
!
!  Apply adjoint boundary conditions. If applicable, exchange boundary
!  data.
!
# ifdef DISTRIBUTE
!>      CALL mp_exchange3d (ng, model, 1, Istr, Iend, Jstr, Jend,       &
!>   &                      ILB, IUB, JLB, JUB, LBk, UBk,               &
!>   &                      Nghost, EWperiodic, NSperiodic,             &
!>   &                      tl_Awrk(:,:,:,Nnew))
!>
        CALL ad_mp_exchange3d (ng, model, 1, Istr, Iend, Jstr, Jend,    &
     &                         ILB, IUB, JLB, JUB, LBk, UBk,            &
     &                         Nghost, EWperiodic, NSperiodic,          &
     &                         ad_Awrk(:,:,:,Nnew))
# endif
!>      CALL bc_r3d_tile (ng, Istr, Iend, Jstr, Jend,                   &
!>   &                    ILB, IUB, JLB, JUB, LBk, UBk,                 &
!>   &                    tl_Awrk(:,:,:,Nnew))
!>
        CALL ad_bc_r3d_tile (ng, Istr, Iend, Jstr, Jend,                &
     &                       ILB, IUB, JLB, JUB, LBk, UBk,              &
     &                       ad_Awrk(:,:,:,Nnew))
!
!  Time-step adjoint horizontal harmonic diffusion term.
!
        DO k=1,N(ng)
          DO j=Jstr,Jend
            DO i=Istr,Iend
!>            tl_Awrk(i,j,k,Nnew)=tl_Awrk(i,j,k,Nold)+                  &
!>   &                            Hfac(i,j)*                            &
!>   &                            (tl_FX(i+1,j)-tl_FX(i,j)+             &
!>   &                             tl_FE(i,j+1)-tl_FE(i,j))
!>
              adfac=Hfac(i,j)*ad_Awrk(i,j,k,Nnew)
              ad_FE(i,j  )=ad_FE(i,j  )-adfac
              ad_FE(i,j+1)=ad_FE(i,j+1)+adfac
              ad_FX(i  ,j)=ad_FX(i  ,j)-adfac
              ad_FX(i+1,j)=ad_FX(i+1,j)+adfac
              ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+                  &
     &                            ad_Awrk(i,j,k,Nnew)
              ad_Awrk(i,j,k,Nnew)=0.0_r8
            END DO
          END DO
!
!  Compute XI- and ETA-components of the adjoint diffusive flux.
!
          DO j=Jstr,Jend+1
            DO i=Istr,Iend
# ifdef MASKING
!>            tl_FE(i,j)=tl_FE(i,j)*vmask(i,j)
!>
              ad_FE(i,j)=ad_FE(i,j)*vmask(i,j)
# endif
!>            tl_FE(i,j)=pnom_v(i,j)*0.5_r8*(Kh(i,j-1)+Kh(i,j))*        &
!>   &                   (tl_Awrk(i,j,k,Nold)-tl_Awrk(i,j-1,k,Nold))
!>
              adfac=pnom_v(i,j)*0.5_r8*(Kh(i,j-1)+Kh(i,j))*ad_FE(i,j)
              ad_Awrk(i,j-1,k,Nold)=ad_Awrk(i,j-1,k,Nold)-adfac
              ad_Awrk(i,j  ,k,Nold)=ad_Awrk(i,j  ,k,Nold)+adfac
              ad_FE(i,j)=0.0_r8
            END DO
          END DO
          DO j=Jstr,Jend
            DO i=Istr,Iend+1
# ifdef MASKING
!>            tl_FX(i,j)=tl_FX(i,j)*umask(i,j)
!>
              ad_FX(i,j)=ad_FX(i,j)*umask(i,j)
# endif
!>            tl_FX(i,j)=pmon_u(i,j)*0.5_r8*(Kh(i-1,j)+Kh(i,j))*        &
!>   &                   (tl_Awrk(i,j,k,Nold)-tl_Awrk(i-1,j,k,Nold))
!>
              adfac=pmon_u(i,j)*0.5_r8*(Kh(i-1,j)+Kh(i,j))*ad_FX(i,j)
              ad_Awrk(i-1,j,k,Nold)=ad_Awrk(i-1,j,k,Nold)-adfac
              ad_Awrk(i  ,j,k,Nold)=ad_Awrk(i  ,j,k,Nold)+adfac
              ad_FX(i,j)=0.0_r8
            END DO
          END DO
        END DO
      END DO
!
!-----------------------------------------------------------------------
!  Integrate adjoint vertical diffusion term.
!-----------------------------------------------------------------------
!
      DO step=1,NVsteps
!
!  Update integration indices.
!
        Nsav=Nnew
        Nnew=Nold
        Nold=Nsav

        DO j=Jstr-1,Jend+1
!
!  Time-step adjoint vertical diffusive term. Notice that "oHz" is
!  assumed to be time invariant.
!
          DO k=1,N(ng)
            DO i=Istr-1,Iend+1
!>            tl_Awrk(i,j,k,Nnew)=tl_Awrk(i,j,k,Nold)+                  &
!>   &                            oHz(i,j,k)*(tl_FS(i,k  )-             &
!>   &                                        tl_FS(i,k-1))
!>
              adfac=oHz(i,j,k)*ad_Awrk(i,j,k,Nnew)
              ad_FS(i,k-1)=ad_FS(i,k-1)-adfac
              ad_FS(i,k  )=ad_FS(i,k  )+adfac
              ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+                  &
     &                            ad_Awrk(i,j,k,Nnew)
              ad_Awrk(i,j,k,Nnew)=0.0_r8
            END DO
          END DO
!
!  Compute adjoint vertical diffusive flux.  Notice that "Vfac" is
!  assumed to be time invariant.
!
          DO i=Istr,Iend
!>          tl_FS(i,N(ng))=0.0_r8
!>
            ad_FS(i,N(ng))=0.0_r8
!>          tl_FS(i,0)=0.0_r8
!>
            ad_FS(i,0)=0.0_r8
          END DO
          DO k=1,N(ng)-1
            DO i=Istr-1,Iend+1
!>            tl_FS(i,k)=Vfac(i,j,k)*(tl_Awrk(i,j,k+1,Nold)-            &
!>   &                                tl_Awrk(i,j,k  ,Nold))
!>
              adfac=Vfac(i,j,k)*ad_FS(i,k)
              ad_Awrk(i,j,k  ,Nold)=ad_Awrk(i,j,k  ,Nold)-adfac
              ad_Awrk(i,j,k+1,Nold)=ad_Awrk(i,j,k+1,Nold)+adfac
              ad_FS(i,k)=0.0_r8
            END DO
          END DO
        END DO
      END DO
!
!  Set adjoint initial conditions.
!
      DO k=1,N(ng)
        DO j=Jstr-1,Jend+1
          DO i=Istr-1,Iend+1
!>          tl_Awrk(i,j,k,Nold)=tl_A(i,j,k)
!>
            ad_A(i,j,k)=ad_A(i,j,k)+ad_Awrk(i,j,k,Nold)
            ad_Awrk(i,j,k,Nold)=0.0_r8
          END DO
        END DO
      END DO

      RETURN
      END SUBROUTINE ad_bcov_r3d_tile
!
!***********************************************************************
      SUBROUTINE ad_bcov_u3d_tile (ng, model, Istr, Iend, Jstr, Jend,   &
     &                             LBi, UBi, LBj, UBj, LBk, UBk,        &
     &                             Nghost, NHsteps, NVsteps,            &
     &                             DTsizeH, DTsizeV,                    &
     &                             Kh, Kv,                              &
     &                             pm, pn, pmon_r, pnom_p,              &
# ifdef MASKING
     &                             pmask,                               &
# endif
     &                             Hz, z_r,                             &
     &                             ad_A)
!***********************************************************************
!
      USE mod_param
!
      USE ad_bc_3d_mod, ONLY: ad_bc_u3d_tile
# ifdef DISTRIBUTE
      USE mp_exchange_mod, ONLY : ad_mp_exchange3d
# endif
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(in) :: Nghost, NHsteps, NVsteps

      real(r8), intent(in) :: DTsizeH, DTsizeV
!
# ifdef ASSUMED_SHAPE
      real(r8), intent(in) :: pm(LBi:,LBj:)
      real(r8), intent(in) :: pn(LBi:,LBj:)
      real(r8), intent(in) :: pmon_r(LBi:,LBj:)
      real(r8), intent(in) :: pnom_p(LBi:,LBj:)
#  ifdef MASKING
      real(r8), intent(in) :: pmask(LBi:,LBj:)
#  endif
      real(r8), intent(in) :: Hz(LBi:,LBj:,:)
      real(r8), intent(in) :: z_r(LBi:,LBj:,:)
      real(r8), intent(in) :: Kh(LBi:,LBj:)
      real(r8), intent(in) :: Kv(LBi:,LBj:,0:)

      real(r8), intent(inout) :: ad_A(LBi:,LBj:,LBk:)
# else
      real(r8), intent(in) :: pm(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pn(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pmon_r(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pnom_p(LBi:UBi,LBj:UBj)
#  ifdef MASKING
      real(r8), intent(in) :: pmask(LBi:UBi,LBj:UBj)
#  endif
      real(r8), intent(in) :: Hz(LBi:UBi,LBj:UBj,N(ng))
      real(r8), intent(in) :: z_r(LBi:UBi,LBj:UBj,N(ng))
      real(r8), intent(in) :: Kh(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: Kv(LBi:UBi,LBj:UBj,0:UBk)

      real(r8), intent(inout) :: ad_A(LBi:UBi,LBj:UBj,LBk:UBk)
# endif
!
!  Local variable declarations.
!
# ifdef DISTRIBUTE
#  ifdef EW_PERIODIC
      logical :: EWperiodic=.TRUE.
#  else
      logical :: EWperiodic=.FALSE.
#  endif
#  ifdef NS_PERIODIC
      logical :: NSperiodic=.TRUE.
#  else
      logical :: NSperiodic=.FALSE.
#  endif
# endif
      integer :: IstrR, IendR, JstrR, JendR, IstrU, JstrV
      integer :: ILB, IUB, JLB, JUB
      integer :: Nnew, Nold, Nsav, i, j, k, step

      real(r8) :: adfac, cff

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: Hfac

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng)) :: Vfac
      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng)) :: oHz

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng),2) :: ad_Awrk

      real(r8), dimension(PRIVATE_1D_SCRATCH_ARRAY,0:N(ng)) :: ad_FS

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: ad_FE
      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: ad_FX

# include "set_bounds.h"
!
      ILB=LBOUND(ad_Awrk,DIM=1)
      IUB=UBOUND(ad_Awrk,DIM=1)
      JLB=LBOUND(ad_Awrk,DIM=2)
      JUB=UBOUND(ad_Awrk,DIM=2)
!
!-----------------------------------------------------------------------
!  Initialize adjoint private variables.
!-----------------------------------------------------------------------
!
      ad_Awrk(ILB:IUB,JLB:JUB,1:N(ng),1:2)=0.0_r8

      ad_FS(ILB:IUB,0:N(ng))=0.0_r8

      ad_FE(ILB:IUB,JLB:JUB)=0.0_r8
      ad_FX(ILB:IUB,JLB:JUB)=0.0_r8
!
!-----------------------------------------------------------------------
!  Adjoint space convolution of the diffusion equation for a 3D state
!  variable at U-points.
!-----------------------------------------------------------------------
!
!  Compute metrics factors.  Notice that "z_r" and "Hz" are assumed to
!  be time invariant in the vertical convolution.  Scratch array are
!  used for efficiency.
!
      cff=DTsizeH*0.25_r8
      DO j=Jstr-1,Jend+1
        DO i=IstrU-1,Iend+1
          Hfac(i,j)=cff*(pm(i-1,j)+pm(i,j))*(pn(i-1,j)+pn(i,j))
          DO k=1,N(ng)-1
            Vfac(i,j,k)=DTsizeV*(Kv(i-1,j,k)+Kv(i,j,k))/                &
     &                  (z_r(i-1,j,k+1)+z_r(i,j,k+1)-                   &
     &                   z_r(i-1,j,k  )-z_r(i,j,k  ))
          END DO
          DO k=1,N(ng)
            oHz(i,j,k)=2.0_r8/(Hz(i-1,j,k)+Hz(i,j,k))
          END DO
        END DO
      END DO
!
!  Load adjoint solution.
!
      Nold=1
      Nnew=2
      DO k=1,N(ng)
        DO j=JstrR,JendR
          DO i=IstrU-1,IendR
!>          tl_A(i,j,k)=tl_Awrk(i,j,k,Nold)
!>
            ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+ad_A(i,j,k)
            ad_A(i,j,k)=0.0_r8
          END DO
        END DO
      END DO
!
!-----------------------------------------------------------------------
!  Integrate adjoint horizontal diffusion terms.
!-----------------------------------------------------------------------
!
      DO step=1,NHsteps
!
!  Update integration indices.
!
        Nsav=Nnew
        Nnew=Nold
        Nold=Nsav
!
!  Apply adjoint boundary conditions. If applicable, exchange boundary
!  data.
!
# ifdef DISTRIBUTE
!>      CALL mp_exchange3d (ng, model, 1, Istr, Iend, Jstr, Jend,       &
!>   &                      ILB, IUB, JLB, JUB, LBk, UBk,               &
!>   &                      Nghost, EWperiodic, NSperiodic,             &
!>   &                      tl_Awrk(:,:,:,Nnew))
!>
        CALL ad_mp_exchange3d (ng, model, 1, Istr, Iend, Jstr, Jend,    &
     &                         ILB, IUB, JLB, JUB, LBk, UBk,            &
     &                         Nghost, EWperiodic, NSperiodic,          &
     &                         ad_Awrk(:,:,:,Nnew))
# endif
!>      CALL bc_u3d_tile (ng, Istr, Iend, Jstr, Jend,                   &
!>   &                    ILB, IUB, JLB, JUB, LBk, UBk,                 &
!>   &                    tl_Awrk(:,:,:,Nnew))
!>
        CALL ad_bc_u3d_tile (ng, Istr, Iend, Jstr, Jend,                &
     &                       ILB, IUB, JLB, JUB, LBk, UBk,              &
     &                       ad_Awrk(:,:,:,Nnew))
!
!  Time-step adjoint horizontal diffusion terms.
!
        DO k=1,N(ng)
          DO j=Jstr,Jend
            DO i=IstrU,Iend
!>            tl_Awrk(i,j,k,Nnew)=tl_Awrk(i,j,k,Nold)+                  &
!>   &                            Hfac(i,j)*                            &
!>   &                            (tl_FX(i,j)-tl_FX(i-1,j)+             &
!>   &                             tl_FE(i,j+1)-tl_FE(i,j))
!>
              adfac=Hfac(i,j)*ad_Awrk(i,j,k,Nnew)
              ad_FE(i,j  )=ad_FE(i,j  )-adfac
              ad_FE(i,j+1)=ad_FE(i,j+1)+adfac
              ad_FX(i-1,j)=ad_FX(i-1,j)-adfac
              ad_FX(i  ,j)=ad_FX(i  ,j)+adfac
              ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+                  &
     &                            ad_Awrk(i,j,k,Nnew)
              ad_Awrk(i,j,k,Nnew)=0.0_r8
            END DO
          END DO
!
!  Compute XI- and ETA-components of the adjoint diffusive flux.
!
          DO j=Jstr,Jend+1
            DO i=IstrU,Iend
# ifdef MASKING
!>            tl_FE(i,j)=tl_FE(i,j)*pmask(i,j)
!>
              ad_FE(i,j)=ad_FE(i,j)*pmask(i,j)
# endif
!>            tl_FE(i,j)=pnom_p(i,j)*0.25_r8*(Kh(i-1,j  )+Kh(i,j  )+    &
!>   &                                        Kh(i-1,j-1)+Kh(i,j-1))*   &
!>   &                   (tl_Awrk(i,j,k,Nold)-tl_Awrk(i,j-1,k,Nold))
!>
              adfac=pnom_p(i,j)*0.25_r8*(Kh(i-1,j  )+Kh(i,j  )+         &
     &                                   Kh(i-1,j-1)+Kh(i,j-1))*        &
     &              ad_FE(i,j)
              ad_Awrk(i,j-1,k,Nold)=ad_Awrk(i,j-1,k,Nold)-adfac
              ad_Awrk(i,j  ,k,Nold)=ad_Awrk(i,j  ,k,Nold)+adfac
              ad_FE(i,j)=0.0_r8
            END DO
          END DO
          DO j=Jstr,Jend
            DO i=IstrU-1,Iend
!>            tl_FX(i,j)=pmon_r(i,j)*                                   &
!>   &                   (tl_Awrk(i+1,j,k,Nold)-tl_Awrk(i,j,k,Nold))
!>
              adfac=pmon_r(i,j)*Kh(i,j)*ad_FX(i,j)
              ad_Awrk(i  ,j,k,Nold)=ad_Awrk(i  ,j,k,Nold)-adfac
              ad_Awrk(i+1,j,k,Nold)=ad_Awrk(i+1,j,k,Nold)+adfac
              ad_FX(i,j)=0.0_r8
            END DO
          END DO
        END DO
      END DO
!
!-----------------------------------------------------------------------
!  Integerate adjoint vertical diffusion term.
!-----------------------------------------------------------------------
!
      DO step=1,NVsteps
!
!  Update integration indices.
!
        Nsav=Nnew
        Nnew=Nold
        Nold=Nsav

        DO j=Jstr-1,Jend+1
!
!  Time-step adjoint vertical diffusive term. Notice that "oHz" is
!  assumed to be time invariant.
!
          DO k=1,N(ng)
            DO i=IstrU-1,Iend+1
!>            tl_Awrk(i,j,k,Nnew)=tl_Awrk(i,j,k,Nold)+                  &
!>   &                            oHz(i,j,k)*(tl_FS(i,k  )-             &
!>   &                                        tl_FS(i,k-1))
!>
              adfac=oHz(i,j,k)*ad_Awrk(i,j,k,Nnew)
              ad_FS(i,k-1)=ad_FS(i,k-1)-adfac
              ad_FS(i,k  )=ad_FS(i,k  )+adfac
              ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+                  &
     &                            ad_Awrk(i,j,k,Nnew)
              ad_Awrk(i,j,k,Nnew)=0.0_r8
            END DO
          END DO
!
!  Compute adjoint vertical diffusive flux.  Notice that "Vfac" is
!  assumed to be time invariant.
!
          DO i=IstrU-1,Iend+1
!>          tl_FS(i,N(ng))=0.0_r8
!>
            ad_FS(i,N(ng))=0.0_r8
!>          tl_FS(i,0)=0.0_r8
!>
            ad_FS(i,0)=0.0_r8
          END DO
          DO k=1,N(ng)-1
            DO i=IstrU-1,Iend+1
!>            tl_FS(i,k)=Vfac(i,j,k)*(tl_Awrk(i,j,k+1,Nold)-            &
!>   &                                tl_Awrk(i,j,k  ,Nold))
!>
              adfac=Vfac(i,j,k)*ad_FS(i,k)
              ad_Awrk(i,j,k  ,Nold)=ad_Awrk(i,j,k  ,Nold)-adfac
              ad_Awrk(i,j,k+1,Nold)=ad_Awrk(i,j,k+1,Nold)+adfac
              ad_FS(i,k)=0.0_r8
            END DO
          END DO
        END DO
      END DO
!
!  Set adjoint initial conditions.
!
      DO k=1,N(ng)
        DO j=Jstr-1,Jend+1
          DO i=IstrU-1,Iend+1
!>          tl_Awrk(i,j,k,Nold)=tl_A(i,j,k)
!>
            ad_A(i,j,k)=ad_A(i,j,k)+ad_Awrk(i,j,k,Nold)
            ad_Awrk(i,j,k,Nold)=0.0_r8
          END DO
        END DO
      END DO

      RETURN
      END SUBROUTINE ad_bcov_u3d_tile
!
!***********************************************************************
      SUBROUTINE ad_bcov_v3d_tile (ng, model, Istr, Iend, Jstr, Jend,   &
     &                             LBi, UBi, LBj, UBj, LBk, UBk,        &
     &                             Nghost, NHsteps, NVsteps,            &
     &                             DTsizeH, DTsizeV,                    &
     &                             Kh, Kv,                              &
     &                             pm, pn, pmon_p, pnom_r,              &
# ifdef MASKING
     &                             pmask,                               &
# endif
     &                             Hz, z_r,                             &
     &                             ad_A)
!***********************************************************************
!
      USE mod_param
!
      USE ad_bc_3d_mod, ONLY: ad_bc_v3d_tile
# ifdef DISTRIBUTE
      USE mp_exchange_mod, ONLY : ad_mp_exchange3d
# endif
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(in) :: Nghost, NHsteps, NVsteps

      real(r8), intent(in) :: DTsizeH, DTsizeV
!
# ifdef ASSUMED_SHAPE
      real(r8), intent(in) :: pm(LBi:,LBj:)
      real(r8), intent(in) :: pn(LBi:,LBj:)
      real(r8), intent(in) :: pmon_p(LBi:,LBj:)
      real(r8), intent(in) :: pnom_r(LBi:,LBj:)
#  ifdef MASKING
      real(r8), intent(in) :: pmask(LBi:,LBj:)
#  endif
      real(r8), intent(in) :: Hz(LBi:,LBj:,:)
      real(r8), intent(in) :: z_r(LBi:,LBj:,:)
      real(r8), intent(in) :: Kh(LBi:,LBj:)
      real(r8), intent(in) :: Kv(LBi:,LBj:,0:)

      real(r8), intent(inout) :: ad_A(LBi:,LBj:,LBk:)
# else
      real(r8), intent(in) :: pm(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pn(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pmon_p(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: pnom_r(LBi:UBi,LBj:UBj)
#  ifdef MASKING
      real(r8), intent(in) :: pmask(LBi:UBi,LBj:UBj)
#  endif
      real(r8), intent(in) :: Hz(LBi:UBi,LBj:UBj,N(ng))
      real(r8), intent(in) :: z_r(LBi:UBi,LBj:UBj,N(ng))
      real(r8), intent(in) :: Kh(LBi:UBi,LBj:UBj)
      real(r8), intent(in) :: Kv(LBi:UBi,LBj:UBj,0:UBk)

      real(r8), intent(inout) :: ad_A(LBi:UBi,LBj:UBj,LBk:UBk)
# endif
!
!  Local variable declarations.
!
# ifdef DISTRIBUTE
#  ifdef EW_PERIODIC
      logical :: EWperiodic=.TRUE.
#  else
      logical :: EWperiodic=.FALSE.
#  endif
#  ifdef NS_PERIODIC
      logical :: NSperiodic=.TRUE.
#  else
      logical :: NSperiodic=.FALSE.
#  endif
# endif
      integer :: IstrR, IendR, JstrR, JendR, IstrU, JstrV
      integer :: ILB, IUB, JLB, JUB
      integer :: Nnew, Nold, Nsav, i, j, k, step

      real(r8) :: adfac, cff

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: Hfac

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng)) :: Vfac
      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng)) :: oHz

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY,N(ng),2) :: ad_Awrk

      real(r8), dimension(PRIVATE_1D_SCRATCH_ARRAY,0:N(ng)) :: ad_FS

      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: ad_FE
      real(r8), dimension(PRIVATE_2D_SCRATCH_ARRAY) :: ad_FX

# include "set_bounds.h"
!
      ILB=LBOUND(ad_Awrk,DIM=1)
      IUB=UBOUND(ad_Awrk,DIM=1)
      JLB=LBOUND(ad_Awrk,DIM=2)
      JUB=UBOUND(ad_Awrk,DIM=2)
!
!-----------------------------------------------------------------------
!  Initialize adjoint private variables.
!-----------------------------------------------------------------------
!
      ad_Awrk(ILB:IUB,JLB:JUB,1:N(ng),1:2)=0.0_r8

      ad_FS(ILB:IUB,0:N(ng))=0.0_r8

      ad_FE(ILB:IUB,JLB:JUB)=0.0_r8
      ad_FX(ILB:IUB,JLB:JUB)=0.0_r8
!
!-----------------------------------------------------------------------
!  Adjoint space convolution of the diffusion equation for a 3D state
!  variable at V-points
!-----------------------------------------------------------------------
!
!  Compute metrics factors.  Notice that "z_r" and "Hz" are assumed to
!  be time invariant in the vertical convolution.  Scratch array are
!  used for efficiency.
!
      cff=DTsizeH*0.25_r8
      DO j=JstrV-1,Jend+1
        DO i=Istr-1,Iend+1
          Hfac(i,j)=cff*(pm(i,j-1)+pm(i,j))*(pn(i,j-1)+pn(i,j))
          DO k=1,N(ng)-1
            Vfac(i,j,k)=DTsizeV*(Kv(i,j-1,k)+Kv(i,j,k))/                &
     &                  (z_r(i,j-1,k+1)+z_r(i,j,k+1)-                   &
     &                   z_r(i,j-1,k  )-z_r(i,j,k  ))
          END DO
          DO k=1,N(ng)
            oHz(i,j,k)=2.0_r8/(Hz(i,j-1,k)+Hz(i,j,k))
          END DO
        END DO
      END DO
!
!  Load adjoint solution.
!
      Nold=1
      Nnew=2
      DO k=1,N(ng)
        DO j=JstrV-1,JendR
          DO i=IstrR,IendR
!>          tl_A(i,j,k)=tl_Awrk(i,j,k,Nold)
!>
            ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+ad_A(i,j,k)
            ad_A(i,j,k)=0.0_r8
          END DO
        END DO
      END DO
!
!-----------------------------------------------------------------------
!  Integrate adjoint horizontal diffusion terms.
!-----------------------------------------------------------------------
!
      DO step=1,NHsteps
!
!  Update integration indices.
!
        Nsav=Nnew
        Nnew=Nold
        Nold=Nsav
!
!  Apply adjoint boundary conditions. If applicable, exchange boundary
!  data.
!
# ifdef DISTRIBUTE
!>      CALL mp_exchange3d (ng, model, 1, Istr, Iend, Jstr, Jend,       &
!>   &                      ILB, IUB, JLB, JUB, LBk, UBk,               &
!>   &                      Nghost, EWperiodic, NSperiodic,             &
!>   &                      tl_Awrk(:,:,:,Nnew))
!>
        CALL ad_mp_exchange3d (ng, model, 1, Istr, Iend, Jstr, Jend,    &
     &                         ILB, IUB, JLB, JUB, LBk, UBk,            &
     &                         Nghost, EWperiodic, NSperiodic,          &
     &                         ad_Awrk(:,:,:,Nnew))
# endif
!>      CALL bc_v3d_tile (ng, Istr, Iend, Jstr, Jend,                   &
!>   &                    ILB, IUB, JLB, JUB, LBk, UBk,                 &
!>   &                    tl_Awrk(:,:,:,Nnew))
!>
        CALL ad_bc_v3d_tile (ng, Istr, Iend, Jstr, Jend,                &
     &                       ILB, IUB, JLB, JUB, LBk, UBk,              &
     &                       ad_Awrk(:,:,:,Nnew))
!
!  Time-step adjoint horizontal diffusion terms.
!
        DO k=1,N(ng)
          DO j=JstrV,Jend
            DO i=Istr,Iend
!>            tl_Awrk(i,j,k,Nnew)=tl_Awrk(i,j,k,Nold)+                  &
!>   &                            Hfac(i,j)*                            &
!>   &                            (tl_FX(i+1,j)-tl_FX(i,j)+             &
!>   &                             tl_FE(i,j)-tl_FE(i,j-1))
!>
              adfac=Hfac(i,j)*ad_Awrk(i,j,k,Nnew)
              ad_FE(i,j-1)=ad_FE(i,j-1)-adfac
              ad_FE(i,j  )=ad_FE(i,j  )+adfac
              ad_FX(i  ,j)=ad_FX(i  ,j)-adfac
              ad_FX(i+1,j)=ad_FX(i+1,j)+adfac
              ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+                  &
     &                            ad_Awrk(i,j,k,Nnew)
              ad_Awrk(i,j,k,Nnew)=0.0_r8
            END DO
          END DO
!
!  Compute XI- and ETA-components of diffusive flux.
!
          DO j=JstrV-1,Jend
            DO i=Istr,Iend
!>            tl_FE(i,j)=pnom_r(i,j)*Kh(i,j)*                           &
!>   &                   (tl_Awrk(i,j+1,k,Nold)-tl_Awrk(i,j,k,Nold))
!>
              adfac=pnom_r(i,j)*Kh(i,j)*ad_FE(i,j)
              ad_Awrk(i,j  ,k,Nold)=ad_Awrk(i,j,k,Nold)-adfac
              ad_Awrk(i,j+1,k,Nold)=ad_Awrk(i,j+1,k,Nold)+adfac
              ad_FE(i,j)=0.0_r8
            END DO
          END DO
          DO j=JstrV,Jend
            DO i=Istr,Iend+1
# ifdef MASKING
!>            tl_FX(i,j)=tl_FX(i,j)*pmask(i,j)
!>
              ad_FX(i,j)=ad_FX(i,j)*pmask(i,j)
# endif
!>            tl_FX(i,j)=pmon_p(i,j)*0.25_r8*(Kh(i-1,j  )+Kh(i,j  )+    &
!>   &                                        Kh(i-1,j-1)+Kh(i,j-1))*   &
!>   &                   (tl_Awrk(i,j,k,Nold)-tl_Awrk(i-1,j,k,Nold))
!>
              adfac=pmon_p(i,j)*0.25_r8*(Kh(i-1,j  )+Kh(i,j  )+         &
     &                                   Kh(i-1,j-1)+Kh(i,j-1))*        &
     &              ad_FX(i,j)
              ad_Awrk(i-1,j,k,Nold)=ad_Awrk(i-1,j,k,Nold)-adfac
              ad_Awrk(i  ,j,k,Nold)=ad_Awrk(i  ,j,k,Nold)+adfac
              ad_FX(i,j)=0.0_r8
            END DO
          END DO
        END DO
      END DO
!
!-----------------------------------------------------------------------
!  Integerate adjoint vertical diffusion term.
!-----------------------------------------------------------------------
!
      DO step=1,NVsteps
!
!  Update integration indices.
!
        Nsav=Nnew
        Nnew=Nold
        Nold=Nsav

        DO j=JstrV-1,Jend+1
!
!  Time-step vertical diffusive term. Notice that "oHz" is assumed to
!  be time invariant.
!
          DO k=1,N(ng)
            DO i=Istr-1,Iend+1
!>             tl_Awrk(i,j,k,Nnew)=tl_Awrk(i,j,k,Nold)+                 &
!>   &                             oHz(i,j,k)*(tl_FS(i,k  )-            &
!>   &                                         tl_FS(i,k-1))
!>
              adfac=oHz(i,j,k)*ad_Awrk(i,j,k,Nnew)
              ad_FS(i,k-1)=ad_FS(i,k-1)-adfac
              ad_FS(i,k  )=ad_FS(i,k  )+adfac
              ad_Awrk(i,j,k,Nold)=ad_Awrk(i,j,k,Nold)+                  &
     &                            ad_Awrk(i,j,k,Nnew)
              ad_Awrk(i,j,k,Nnew)=0.0_r8
            END DO
          END DO
!
!  Compute vertical diffusive flux.  Notice that "Vfac" is assumed to
!  be time invariant.
!
          DO i=Istr-1,Iend+1
!>          tl_FS(i,N(ng))=0.0_r8
!>
            ad_FS(i,N(ng))=0.0_r8
!>          tl_FS(i,0)=0.0_r8
!>
            ad_FS(i,0)=0.0_r8
          END DO
          DO k=1,N(ng)-1
            DO i=Istr-1,Iend+1
!>            tl_FS(i,k)=Vfac(i,j,k)*(tl_Awrk(i,j,k+1,Nold)-            &
!>   &                                tl_Awrk(i,j,k  ,Nold))
!>
              adfac=Vfac(i,j,k)*ad_FS(i,k)
              ad_Awrk(i,j,k  ,Nold)=ad_Awrk(i,j,k  ,Nold)-adfac
              ad_Awrk(i,j,k+1,Nold)=ad_Awrk(i,j,k+1,Nold)+adfac
              ad_FS(i,k)=0.0_r8
            END DO
          END DO
        END DO
      END DO
!
!  Set adjoint initial conditions.
!
      DO k=1,N(ng)
        DO j=JstrV-1,Jend+1
          DO i=Istr-1,Iend+1
!>          tl_Awrk(i,j,k,Nold)=tl_A(i,j,k)
!>
            ad_A(i,j,k)=ad_A(i,j,k)+ad_Awrk(i,j,k,Nold)
            ad_Awrk(i,j,k,Nold)=0.0_r8
          END DO
        END DO
      END DO

      RETURN
      END SUBROUTINE ad_bcov_v3d_tile
#endif
      END MODULE ad_bcov_3d_mod
