#include "cppdefs.h"
#ifdef DISTRIBUTE

# undef  COLLECT_ALLGATHER /* use mpi_allgather in mp_collect */
# define REDUCE_ALLGATHER  /* use mpi_allgather in mp_reduce  */
# undef  REDUCE_ALLREDUCE  /* use mpi_allreduce in mp_reduce  */
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  These routines are used for distrubuted-memory communications       !
!  between parallel nodes:                                             !
!                                                                      !
!  ad_mp_exchange2d  adjoint exchange of I,J tile ghost-points         !
!  ad_mp_exchange3d  adjoint exchange of I,J tile ghost-points         !
!  ad_mp_exchange4d  adjoint exchange of I,J tile ghost-points         !
!  mp_barrier        barrier sychronization                            !
!  mp_bcastf         broadcast floating point variables                !
!  mp_bcasti         broadcast integer variables                       !
!  mp_bcastl         broadcast local variable                          !
!  mp_bcasts         broadcast character variables                     !
!  mp_boundary       exchange boundary data between tiles              !
!  mp_collect        collect 1D vector data from tiles                 !
!  mp_dump           writes 2D and 3D tiles arrays for debugging       !
!  mp_exchange2d     exchange of I,J tile ghost-points                 !
!  mp_exchange3d     exchange of I,J tile ghost-points                 !
!  mp_exchange4d     exchange of I,J tile ghost-points                 !
!  mp_gather         collect I,J tiled 2D and 3D for output purposes   !
!  mp_gather_state   collect state vector for unpacking of variables   !
!  mp_ncread         read in state vector/matrix from NetCDF file      ! 
!  mp_ncwrite        write out state vector/matrix into NetCDF file    !
!  mp_reduce         global reduction operations                       !
!  mp_scatter        scatter input data to 2D and 3D I,J tiled arrays  !
!  mp_scatter_state  scatter global data for packing of state vector   !
!                                                                      !
!=======================================================================
!
      SUBROUTINE mp_barrier (ng)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine blocks the caller until all group members have called  !
!  it.                                                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng
!
!  Local variable declarations.
!
      integer :: MyError
!
!-----------------------------------------------------------------------
!  Synchronize all distribute-memory nodes in the group.
!-----------------------------------------------------------------------
!
# ifdef MPI
      CALL mpi_barrier (OCN_COMM_WORLD, MyError)
# endif

      RETURN
      END SUBROUTINE mp_barrier

      SUBROUTINE mp_bcastf (ng, model, A, Asize)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine broadcasts a floating-point variable to all processors !
!  in the group. It is called by all the members in the group.         !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     A          Variable to broadcast.                                !
!     Asize      Number of entries to broadcast.                       !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Broadcasted variable.                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Asize

      real(r8), intent(inout) :: A(Asize)
!
!  Local variable declarations
!
      integer :: Lstr, MyError

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 40)
# endif
!
!-----------------------------------------------------------------------
!  Broadcast requested variable.
!-----------------------------------------------------------------------
!
# ifdef MPI
      CALL mpi_bcast (A, Asize, MP_FLOAT, MyMaster, OCN_COMM_WORLD,     &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
 10     FORMAT (/,' MP_BCASTF - error during ',a,' call, Node = ',i3.3, &
     &          ' Error = ',i3,/,13x,a)
        exit_flag=2
        RETURN
      END IF
# endif
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 40)
# endif

      RETURN
      END SUBROUTINE mp_bcastf

      SUBROUTINE mp_bcasti (ng, model, A, Asize)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine broadcasts an integer variable to all processors in    !
!  the group. It is called by all the members in the group.            !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     A          Variable to broadcast.                                !
!     Asize      Number of entries to broadcast.                       !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Broadcasted variable.                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Asize

      integer, intent(inout) :: A(Asize)
!
!  Local variable declarations
!
      integer :: Lstr, MyError

      character (len=80) :: string
      
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 40)
# endif
!
!-----------------------------------------------------------------------
!  Broadcast requested variable.
!-----------------------------------------------------------------------
!
# ifdef MPI
      CALL mpi_bcast (A, Asize, MPI_INTEGER, MyMaster, OCN_COMM_WORLD,  &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
 10     FORMAT (/,' MP_BCASTI - error during ',a,' call, Node = ',i3.3, &
     &          ' Error = ',i3,/,13x,a)
        exit_flag=2
        RETURN
      END IF
# endif
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 40)
# endif

      RETURN
      END SUBROUTINE mp_bcasti

      SUBROUTINE mp_bcastl (ng, model, A, Asize)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine broadcasts a logical variable to all processors in     !
!  the group. It is called by all the members in the group.            !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     A          Variable to broadcast.                                !
!     Asize      Number of entries to broadcast.                       !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Broadcasted variable.                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Asize

      logical, intent(inout) :: A(Asize)
!
!  Local variable declarations
!
      integer :: Lstr, MyError

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 40)
# endif
!
!-----------------------------------------------------------------------
!  Broadcast requested variable.
!-----------------------------------------------------------------------
!
# ifdef MPI
      CALL mpi_bcast (A, Asize, MPI_LOGICAL, MyMaster, OCN_COMM_WORLD,  &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
 10     FORMAT (/,' MP_BCASTL - error during ',a,' call, Node = ',i3.3, &
     &          ' Error = ',i3,/,13x,a)
        exit_flag=2
        RETURN
      END IF
# endif
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 40)
# endif

      RETURN
      END SUBROUTINE mp_bcastl

      SUBROUTINE mp_bcasts (ng, model, A, Asize)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine broadcasts a string variable to all processors in the  !
!  group. It is called by all the members in the group.                !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     A          Variable to broadcast.                                !
!     Asize      Number of entries to broadcast.                       !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Broadcasted variable.                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Asize

      character (len=*), intent(inout) :: A
!
!  Local variable declarations
!
      integer :: Lstr, MyError

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 40)
# endif
!
!-----------------------------------------------------------------------
!  Broadcast requested variable.
!-----------------------------------------------------------------------
!
# ifdef MPI
      CALL mpi_bcast (A, Asize, MPI_BYTE, MyMaster, OCN_COMM_WORLD,     &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
 10     FORMAT (/,' MP_BCASTS - error during ',a,' call, Node = ',i3.3, &
     &          ' Error = ',i3,/,13x,a)
        exit_flag=2
        RETURN
      END IF
# endif
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 40)
# endif

      RETURN
      END SUBROUTINE mp_bcasts

      SUBROUTINE mp_boundary (ng, model, Imin, Imax,                    &
     &                        LBi, UBi, LBk, UBk,                       &
     &                        update, A)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine exchanges boundary arrays between tiles.               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Imin       Starting tile index.                                  !
!     Imax       Ending   tile index.                                  !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound, if any. Otherwise, a value   !
!                  of one is expected.                                 !
!     LBk        K-dimension Upper bound, if any. Otherwise, a value   !
!                  of one is expected.                                 !
!     UBk        K-dimension Upper bound.                              !
!     update     Switch activated by the node that updated the         !
!                  boundary data.                                      !
!     A          Boundary array (1D or 2D) to process.                 !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated boundary array (1D or 2D).                    !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: update

      integer, intent(in) :: ng, model, Imin, Imax
      integer, intent(in) :: LBi, UBi, LBk, UBk

      real(r8), intent(inout) :: A(LBi:UBi,LBk:UBk)
!
!  Local variable declarations.
!
      integer :: Ilen, Ioff, Lstr, MyError, Nnodes, Npts, i, ik, k, kc
      integer :: rank

      real(r8), dimension(TileSize(ng)) :: Asend

      real(r8), dimension(TileSize(ng),                                 &
     &                    0:NtileI(ng)*NtileJ(ng)-1) :: Arecv

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 44)
# endif
!
!-----------------------------------------------------------------------
!  Pack boundary data.  Zero-out boundary array except points updated
!  by the appropriate node, so sum reduction can be perfomed during
!  unpacking.
!-----------------------------------------------------------------------
!
      Ilen=UBi-LBi+1
      Ioff=1-LBi
      Npts=Ilen*(UBk-LBk+1)
      Asend(1:Npts)=0.0_r8
      IF (update) THEN
        DO k=LBk,UBk
          kc=(k-LBk)*Ilen
          DO i=Imin,Imax
            ik=i+Ioff+kc
            Asend(ik)=A(i,k)
          END DO
        END DO
      END IF
!
!-----------------------------------------------------------------------
!  Collect data from all nodes.
!-----------------------------------------------------------------------
!
# ifdef MPI
      CALL mpi_allgather (Asend, Npts, MP_FLOAT, Arecv, Npts, MP_FLOAT, &
     &                    OCN_COMM_WORLD, MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_ALLGATHER', MyRank, MyError,             &
     &                    string(1:Lstr)
 10     FORMAT (/,' MP_BOUNDARY - error during ',a,' call, Node = ',    &
     &          i3.3,' Error = ',i3,/,15x,a)
        exit_flag=2
        RETURN
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack data: reduction sum.
!-----------------------------------------------------------------------
!
      Nnodes=NtileI(ng)*NtileJ(ng)-1
      ik=0
      DO k=LBk,UBk
        DO i=LBi,UBi
          A(i,k)=0.0_r8
          ik=ik+1
          DO rank=0,Nnodes
            A(i,k)=A(i,k)+Arecv(ik,rank)
          END DO
        END DO
      END DO
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 44)
# endif

      RETURN
      END SUBROUTINE mp_boundary

      SUBROUTINE mp_collect (ng, model, Npts, Aspv, A)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine collects requested buffer from all members in the      !
!  group. Then, it packs distributed data by removing the special      !
!  values. This routine is used when extracting station data from      !
!  tiled arrays.                                                       !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Npts       Number of extracted data points.                      !
!     Aspv       Special value indicating no data.  This implies that  !
!                  desired data is tile unbouded.                      !
!     A          Extracted data.                                       !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Collected data.                                       !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Npts

      real(r8), intent(in) :: Aspv

      real(r8), intent(inout) :: A(Npts)
!
!  Local variable declarations.
!
      integer :: Lstr, MyError, Nnodes, i, rank, request

      integer, dimension(MPI_STATUS_SIZE) :: status

# ifdef COLLECT_ALLGATHER
      real(r8), dimension(Npts,0:NtileI(ng)*NtileJ(ng)-1) :: Arecv
# else
      real(r8), allocatable :: Arecv(:)
# endif

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 45)
# endif
!
!-----------------------------------------------------------------------
!  Collect data from all nodes.
!-----------------------------------------------------------------------
!
# if defined COLLECT_ALLGATHER
      CALL mpi_allgather (A, Npts, MP_FLOAT, Arecv, Npts, MP_FLOAT,     &
     &                    OCN_COMM_WORLD, MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_ALLGATHER', MyRank, MyError,             &
     &                    string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
!
!  Pack data according to special values: sum or ignore.
!
      Nnodes=NtileI(ng)*NtileJ(ng)-1
      IF (Aspv.eq.0.0_r8) THEN
        DO i=1,Npts
          A(i)=0.0_r8
          DO rank=0,Nnodes
            A(i)=A(i)+Arecv(i,rank)
          END DO
        END DO
      ELSE
        DO i=1,Npts
          DO rank=0,Nnodes
            IF (Arecv(i,rank).ne.Aspv) THEN
              A(i)=Arecv(i,rank)
            END IF
          END DO
        END DO
      END IF
# else
      IF (MyRank.eq.MyMaster) THEN
!
!  If master node, allocate and receive buffer.
!
        IF (.not.allocated(Arecv)) THEN
          allocate (Arecv(Npts))
        END IF
!
!  If master node, loop over other nodes to receive and accumulate the
!  data.
!
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          CALL mpi_irecv (Arecv, Npts, MP_FLOAT, rank, rank+5,          &
     &                    OCN_COMM_WORLD, request, MyError)
          CALL mpi_wait (request, status, MyError)
          IF (MyError.ne.MPI_SUCCESS) THEN
            CALL mpi_error_string (MyError, string, LEN(string),        &
     &                             MyError)
            Lstr=LEN_TRIM(string)
            WRITE (stdout,10) 'MPI_IRECV', rank, MyError, string(1:Lstr)
            exit_flag=2
            RETURN
          END IF
          DO i=1,Npts
            A(i)=A(i)+Arecv(i)
          END DO
        END DO
        deallocate (Arecv)
!
!  Otherwise, send data to master node.
!
      ELSE
        CALL mpi_isend (A, Npts, MP_FLOAT, MyMaster, MyRank+5,          &
     &                  OCN_COMM_WORLD, request, MyError)
        CALL mpi_wait (request, status, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ISEND', MyRank, MyError, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END IF
!
!  Broadcast accumulated (full) data to all nodes.
!
      CALL mpi_bcast (A, Npts, MP_FLOAT, MyMaster, OCN_COMM_WORLD,      &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
# endif
 10   FORMAT (/,' MP_COLLECT - error during ',a,' call, Node = ',       &
     &        i3.3,' Error = ',i3,/,14x,a)

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 45)
# endif

      RETURN
      END SUBROUTINE mp_collect

      SUBROUTINE mp_exchange2d (ng, model, Nvar,                        &
     &                          Istr, Iend, Jstr, Jend,                 &
     &                          LBi, UBi, LBj, UBj,                     &
     &                          Nghost, EWperiodic, NSperiodic,         &
     &                          A, B, C, D)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================ Enrique Curchitser ===
!                                                                      !
!  This routine updates the tile overlap halo of  NV 2D variables.     !
!  It exchanges the specified number of "ghost-points".  In  order     !
!  to minimize the number send and receive calls, the ghost-points     !
!  are included in the buffers.  Therefore, the order of the pack,     !
!  send, receive, and unpack is crucial.                               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nvar       Number of variables for aggregated exchanges.         !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     EWperiodic Switch indicating EW periodicity exchanges.           !
!     NSperiodic Switch indicating NS periodicity exchanges.           !
!     A          2D tiled array to process.                            !
!     B          2D tiled array (optional) to process.                 !
!     C          2D tiled array (optional) to process.                 !
!     D          2D tiled array (optional) to process.                 !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated 2D tiled array.                               !
!     B          Updated 2D tiled array (optional).                    !
!     C          Updated 2D tiled array (optional).                    !
!     D          Updated 2D tiled array (optional).                    !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: EWperiodic, NSperiodic

      integer, intent(in) :: ng, model, Nvar
      integer, intent(in) :: Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj
      integer, intent(in) :: Nghost

      real(r8), intent(inout) :: A(LBi:UBi,LBj:UBj)

      real(r8), intent(inout), optional :: B(LBi:UBi,LBj:UBj)
      real(r8), intent(inout), optional :: C(LBi:UBi,LBj:UBj)
      real(r8), intent(inout), optional :: D(LBi:UBi,LBj:UBj)
!
!  Local variable declarations.
!
      logical :: Wexchange, Sexchange, Eexchange, Nexchange
      logical :: processB, processC, processD

      integer :: i, icS, icN, ioff, Imin, Imax, Ilen
      integer :: j, jcW, jcE, joff, Jmin, Jmax, Jlen
      integer :: m, mc, Lstr, MyRankI, MyRankJ, rank
      integer :: Wtile, GsendW, GrecvW, Wtag, Werror, Wrequest
      integer :: Stile, GsendS, GrecvS, Stag, Serror, Srequest
      integer :: Etile, GsendE, GrecvE, Etag, Eerror, Erequest
      integer :: Ntile, GsendN, GrecvN, Ntag, Nerror, Nrequest
      integer :: EWsize, sizeW, sizeE
      integer :: NSsize, sizeS, sizeN
      integer :: Null_Value

      integer, dimension(-1:NtileI(ng),-1:NtileJ(ng)) :: table
# ifdef MPI
      integer, dimension(MPI_STATUS_SIZE,4) :: status
# endif

# if defined EW_PERIODIC || defined NS_PERIODIC
      integer, parameter :: pp = 1
# else
      integer, parameter :: pp = 0
# endif
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendW
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendE
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendS
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendN

      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvW
      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvE
      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvS
      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvN

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 39)
# endif
!
!-----------------------------------------------------------------------
!  Send and recive tile boundary data.
!-----------------------------------------------------------------------
!
!  Set tile partition table for looking up adjacent processes. Notice
!  that a null value is used in places that data transmition is not
!  required.
!
# if defined MPI
      Null_Value=MPI_PROC_NULL
# else
      Null_Value=-1
# endif
      DO j=-1,NtileJ(ng)
        DO i=-1,NtileI(ng)
          table(i,j)=Null_Value
        END DO
      END DO
      rank=0
      DO j=0,NtileJ(ng)-1
        DO i=0,NtileI(ng)-1
          table(i,j)=rank
          IF (MyRank.eq.rank) THEN
            MyRankI=i
            MyRankJ=j
          END IF
          rank=rank+1            
        END DO
      END DO
!
!  Set boundary communication tags and exchange switches.
!
      Wtag=1
      Stag=2
      Etag=3
      Ntag=4
      Wexchange=.FALSE.
      Sexchange=.FALSE.
      Eexchange=.FALSE.
      Nexchange=.FALSE.
!
!  Set swiches to process additional variables for aggregations.
!
      processB=.FALSE.
      processC=.FALSE.
      processD=.FALSE.
      IF (Nvar.gt.1) processB=.TRUE.
      IF (Nvar.gt.2) processC=.TRUE.
      IF (Nvar.gt.3) processD=.TRUE.
!
!-----------------------------------------------------------------------
!  Process Western and Eastern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Jmin=LBj
      Jmax=UBj
      Jlen=Jmax-Jmin+1
      EWsize=Nvar*(Nghost+pp)*Jlen
!
!  Determine the rank of Western and Eastern tiles.  Then, determine
!  the number of ghost-points to send and receive in the West- and
!  East-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2       
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2   Lm+3
!                      -2    -1     0    1     2      3
!
      IF (EWperiodic) THEN
        IF ((table(MyRankI-1,MyRankJ).eq.Null_Value).and.               &
     &      (NtileI(ng).gt.1)) THEN
          Wtile=table(NtileI(ng)-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
# if defined THREE_GHOST
          GrecvW=Nghost
# else
          GrecvW=Nghost+1
# endif
          GrecvE=Nghost
        ELSE IF ((table(MyRankI+1,MyRankJ).eq.Null_Value).and.          &
     &           (NtileI(ng).gt.1)) THEN
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(0,MyRankJ)
          GsendW=Nghost
# if defined THREE_GHOST
          GsendE=Nghost
# else
          GsendE=Nghost+1
# endif
          GrecvW=Nghost
          GrecvE=Nghost
        ELSE        
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
          GrecvW=Nghost
          GrecvE=Nghost
        END IF
      ELSE
        Wtile=table(MyRankI-1,MyRankJ)
        Etile=table(MyRankI+1,MyRankJ)
        GsendW=Nghost
        GsendE=Nghost
        GrecvW=Nghost
        GrecvE=Nghost
      END IF
      IF (Wtile.ne.Null_Value) Wexchange=.TRUE.
      IF (Etile.ne.Null_Value) Eexchange=.TRUE.
!
!  Pack tile boundary data including ghost-points.
!
      IF (Wexchange) THEN      
        sizeW=0
        DO m=1,GsendW
          mc=(m-1)*Jlen
          DO j=Jmin,Jmax
            sizeW=sizeW+1
            jcW=1+(j-Jmin)+mc
            AsendW(jcW)=A(Istr+m-1,j)
          END DO
        END DO
        IF (processB) THEN
          joff=jcW
          DO m=1,GsendW
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jcW=joff+1+(j-Jmin)+mc
              AsendW(jcW)=B(Istr+m-1,j)
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcW
          DO m=1,GsendW
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jcW=joff+1+(j-Jmin)+mc
              AsendW(jcW)=C(Istr+m-1,j)
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcW
          DO m=1,GsendW
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jcW=joff+1+(j-Jmin)+mc
              AsendW(jcW)=D(Istr+m-1,j)
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN     
        sizeE=0
        DO m=1,GsendE
          mc=(m-1)*Jlen
          DO j=Jmin,Jmax
            sizeE=sizeE+1
            jcE=1+(j-Jmin)+mc
            AsendE(jcE)=A(Iend-GsendE+m,j)
          END DO
        END DO
        IF (processB) THEN
          joff=jcE
          DO m=1,GsendE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jcE=joff+1+(j-Jmin)+mc
              AsendE(jcE)=B(Iend-GsendE+m,j)
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcE
          DO m=1,GsendE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jcE=joff+1+(j-Jmin)+mc
              AsendE(jcE)=C(Iend-GsendE+m,j)
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcE
          DO m=1,GsendE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jcE=joff+1+(j-Jmin)+mc
              AsendE(jcE)=D(Iend-GsendE+m,j)
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Western and Eastern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Wexchange) THEN
        CALL mpi_irecv (ArecvW, EWsize, MP_FLOAT, Wtile, Etag,          &
     &                  OCN_COMM_WORLD, Wrequest, Werror)
      END IF
      IF (Eexchange) THEN
        CALL mpi_irecv (ArecvE, EWsize, MP_FLOAT, Etile, Wtag,          &
     &                  OCN_COMM_WORLD, Erequest, Eerror)
      END IF
      IF (Wexchange) THEN
        CALL mpi_send  (AsendW, sizeW, MP_FLOAT, Wtile, Wtag,           &
     &                  OCN_COMM_WORLD, Werror)
      END IF
      IF (Eexchange) THEN
        CALL mpi_send  (AsendE, sizeE, MP_FLOAT, Etile, Etag,           &
     &                  OCN_COMM_WORLD, Eerror)
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack Western and Eastern segments.
!-----------------------------------------------------------------------
!
      IF (Wexchange) THEN
# ifdef MPI
        CALL mpi_wait (Wrequest, status(1,1), Werror)
        IF (Werror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Werror, string, LEN(string), Werror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Western Edge)',        &
     &                      MyRank, Werror, string(1:Lstr)
 10       FORMAT (/,' MP_EXCHANGE2D - error during ',a,                 &
     &            ' call, Node = ',i3.3,' Error = ',i3,/,15x,a)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=GrecvW,1,-1
          mc=(GrecvW-m)*Jlen
          DO j=Jmin,Jmax
            jcW=1+(j-Jmin)+mc
            A(Istr-m,j)=ArecvW(jcW)
          END DO
        END DO
        IF (processB) THEN
          joff=jcW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*Jlen
            DO j=Jmin,Jmax
              jcW=joff+1+(j-Jmin)+mc
              B(Istr-m,j)=ArecvW(jcW)
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*Jlen
            DO j=Jmin,Jmax
              jcW=joff+1+(j-Jmin)+mc
              C(Istr-m,j)=ArecvW(jcW)
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*Jlen
            DO j=Jmin,Jmax
              jcW=joff+1+(j-Jmin)+mc
              D(Istr-m,j)=ArecvW(jcW)
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
# ifdef MPI
        CALL mpi_wait (Erequest, status(1,3), Eerror)
        IF (Eerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Eerror, string, LEN(string), Eerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Eastern Edge)',        &
     &                      MyRank, Eerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GrecvE
          mc=(m-1)*Jlen
          DO j=Jmin,Jmax
            jcE=1+(j-Jmin)+mc
            A(Iend+m,j)=ArecvE(jcE)
          ENDDO
        END DO
        IF (processB) THEN
          joff=jcE
          DO m=1,GrecvE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcE=joff+1+(j-Jmin)+mc
              B(Iend+m,j)=ArecvE(jcE)
            ENDDO
          END DO
        END IF
        IF (processC) THEN
          joff=jcE
          DO m=1,GrecvE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcE=joff+1+(j-Jmin)+mc
              C(Iend+m,j)=ArecvE(jcE)
            ENDDO
          END DO
        END IF
        IF (processD) THEN
          joff=jcE
          DO m=1,GrecvE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcE=joff+1+(j-Jmin)+mc
              D(Iend+m,j)=ArecvE(jcE)
            ENDDO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Process Southern and Northern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Imin=LBi
      Imax=UBi
      Ilen=Imax-Imin+1
      NSsize=Nvar*(Nghost+pp)*Ilen
!
!  Determine the rank of Southern and Northern tiles.  Then, determine
!  the number of ghost-points to send and receive in the South- and
!  North-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2  Mm+3
!                      -2    -1     0    1     2     3
!
      IF (NSperiodic) THEN
        IF ((table(MyRankI,MyRankJ-1).eq.Null_Value).and.               &
     &      (NtileJ(ng).gt.1)) THEN
          Stile=table(MyRankI,NtileJ(ng)-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
# if defined THREE_GHOST
          GrecvS=Nghost
# else
          GrecvS=Nghost+1
# endif
          GrecvN=Nghost
        ELSE IF ((table(MyRankI,MyRankJ+1).eq.Null_Value).and.          &
     &           (NtileJ(ng).gt.1)) then
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,0)
          GsendS=Nghost
# if defined THREE_GHOST
          GsendN=Nghost
# else
          GsendN=Nghost+1
# endif
          GrecvS=Nghost
          GrecvN=Nghost
        ELSE
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
          GrecvS=Nghost
          GrecvN=Nghost
        END IF
      ELSE
        Stile=table(MyRankI,MyRankJ-1)
        Ntile=table(MyRankI,MyRankJ+1)
        GsendS=Nghost
        GsendN=Nghost
        GrecvS=Nghost
        GrecvN=Nghost
      END IF
      IF (Stile.ne.Null_Value) Sexchange=.TRUE.
      IF (Ntile.ne.Null_Value) Nexchange=.TRUE.
!
!  Pack tile boundary data including ghost-points.
!
      IF (Sexchange) THEN
        sizeS=0
        DO m=1,GsendS
          mc=(m-1)*Ilen
          DO i=Imin,Imax
            sizeS=sizeS+1
            icS=1+(i-Imin)+mc
            AsendS(icS)=A(i,Jstr+m-1)
          END DO
        END DO
        IF (processB) THEN
          ioff=icS
          DO m=1,GsendS
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              icS=ioff+1+(i-Imin)+mc
              AsendS(icS)=B(i,Jstr+m-1)
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icS
          DO m=1,GsendS
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              icS=ioff+1+(i-Imin)+mc
              AsendS(icS)=C(i,Jstr+m-1)
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icS
          DO m=1,GsendS
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              icS=ioff+1+(i-Imin)+mc
              AsendS(icS)=D(i,Jstr+m-1)
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
        sizeN=0
        DO m=1,GsendN
          mc=(m-1)*Ilen
          DO i=Imin,Imax
            sizeN=sizeN+1
            icN=1+(i-Imin)+mc
            AsendN(icN)=A(i,Jend-GsendN+m)
          END DO
        END DO
        IF (processB) THEN
          ioff=icN
          DO m=1,GsendN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              icN=ioff+1+(i-Imin)+mc
              AsendN(icN)=B(i,Jend-GsendN+m)
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icN
          DO m=1,GsendN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              icN=ioff+1+(i-Imin)+mc
              AsendN(icN)=C(i,Jend-GsendN+m)
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icN
          DO m=1,GsendN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              icN=ioff+1+(i-Imin)+mc
              AsendN(icN)=D(i,Jend-GsendN+m)
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Southern and Northern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Sexchange) THEN
        CALL mpi_irecv (ArecvS, NSsize, MP_FLOAT, Stile, Ntag,          &
     &                  OCN_COMM_WORLD, Srequest, Serror)
      END IF
      IF (Nexchange) THEN
        CALL mpi_irecv (ArecvN, NSsize, MP_FLOAT, Ntile, Stag,          &
     &                  OCN_COMM_WORLD, Nrequest, Nerror)
      END IF
      IF (Sexchange) THEN
        CALL mpi_send  (AsendS, sizeS, MP_FLOAT, Stile, Stag,           &
     &                  OCN_COMM_WORLD, Serror)
      END IF
      IF (Nexchange) THEN
        CALL mpi_send  (AsendN, sizeN, MP_FLOAT, Ntile, Ntag,           &
     &                  OCN_COMM_WORLD, Nerror)
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack Northern and Southern segments.
!-----------------------------------------------------------------------
!
      IF (Sexchange) THEN
# ifdef MPI
        CALL mpi_wait (Srequest, status(1,2), Serror)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Serror, string, LEN(string), Serror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Southern Edge)',       &
     &                      MyRank, Serror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=GrecvS,1,-1
          mc=(GrecvS-m)*Ilen     
          DO i=Imin,Imax
            icS=1+(i-Imin)+mc
            A(i,Jstr-m)=ArecvS(icS)
          END DO
        END DO
        IF (processB) THEN
          ioff=icS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*Ilen     
            DO i=Imin,Imax
              icS=ioff+1+(i-Imin)+mc
              B(i,Jstr-m)=ArecvS(icS)
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*Ilen     
            DO i=Imin,Imax
              icS=ioff+1+(i-Imin)+mc
              C(i,Jstr-m)=ArecvS(icS)
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*Ilen     
            DO i=Imin,Imax
              icS=ioff+1+(i-Imin)+mc
              D(i,Jstr-m)=ArecvS(icS)
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
# ifdef MPI
        CALL mpi_wait (Nrequest, status(1,4), Nerror)
        IF (Nerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Nerror, string, LEN(string), Nerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Northern Edge)',       &
     &                      MyRank, Nerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GrecvN
          mc=(m-1)*Ilen
          DO i=Imin,Imax
            icN=1+(i-Imin)+mc
            A(i,Jend+m)=ArecvN(icN)
          END DO
        END DO
        IF (processB) THEN
          ioff=icN
          DO m=1,GrecvN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icN=ioff+1+(i-Imin)+mc
              B(i,Jend+m)=ArecvN(icN)
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icN
          DO m=1,GrecvN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icN=ioff+1+(i-Imin)+mc
              C(i,Jend+m)=ArecvN(icN)
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icN
          DO m=1,GrecvN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icN=ioff+1+(i-Imin)+mc
              D(i,Jend+m)=ArecvN(icN)
            END DO
          END DO
        END IF
      END IF
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 39)
# endif

      RETURN
      END SUBROUTINE mp_exchange2d

      SUBROUTINE mp_exchange3d (ng, model, Nvar,                        &
     &                          Istr, Iend, Jstr, Jend,                 &
     &                          LBi, UBi, LBj, UBj, LBk, UBk,           &
     &                          Nghost, EWperiodic, NSperiodic,         &
     &                          A, B, C, D)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================ Enrique Curchitser ===
!                                                                      !
!  This routine updates the tile overlap halo of  NV 3D variables.     !
!  It exchanges the specified number of "ghost-points".  In  order     !
!  to minimize the number send and receive calls, the ghost-points     !
!  are included in the buffers.  Therefore, the order of the pack,     !
!  send, receive, and unpack is crucial.                               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nvar       Number of variables for aggregated exchanges.         !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound.
!     UBk        K-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     EWperiodic Switch indicating EW periodicity exchanges.           !
!     NSperiodic Switch indicating NS periodicity exchanges.           !
!     A          3D tiled array to process.                            !
!     B          3D tiled array (optional) to process.                 !
!     C          3D tiled array (optional) to process.                 !
!     D          3D tiled array (optional) to process.                 !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated 3D tiled array.                               !
!     B          Updated 3D tiled array (optional).                    !
!     C          Updated 3D tiled array (optional).                    !
!     D          Updated 3D tiled array (optional).                    !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: EWperiodic, NSperiodic

      integer, intent(in) :: ng, model, Nvar
      integer, intent(in) :: Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(in) :: Nghost

      real(r8), intent(inout) :: A(LBi:UBi,LBj:UBj,LBk:UBk)

      real(r8), intent(inout), optional :: B(LBi:UBi,LBj:UBj,LBk:UBk)
      real(r8), intent(inout), optional :: C(LBi:UBi,LBj:UBj,LBk:UBk)
      real(r8), intent(inout), optional :: D(LBi:UBi,LBj:UBj,LBk:UBk)
!
!  Local variable declarations.
!
      logical :: Wexchange, Sexchange, Eexchange, Nexchange
      logical :: processB, processC, processD

      integer :: i, ikS, ikN, ioff, Imin, Imax, Ilen, IKlen
      integer :: j, jkW, jkE, joff, Jmin, Jmax, Jlen, JKlen 
      integer :: k, kc,  m, mc, Klen, Lstr, MyRankI, MyRankJ, rank
      integer :: Wtile, GsendW, GrecvW, Wtag, Werror, Wrequest
      integer :: Stile, GsendS, GrecvS, Stag, Serror, Srequest
      integer :: Etile, GsendE, GrecvE, Etag, Eerror, Erequest
      integer :: Ntile, GsendN, GrecvN, Ntag, Nerror, Nrequest
      integer :: EWsize, sizeW, sizeE
      integer :: NSsize, sizeS, sizeN
      integer :: Null_Value

      integer, dimension(-1:NtileI(ng),-1:NtileJ(ng)) :: table
# ifdef MPI
      integer, dimension(MPI_STATUS_SIZE,4) :: status
# endif

# if defined EW_PERIODIC || defined NS_PERIODIC
      integer, parameter :: pp = 1
# else
      integer, parameter :: pp = 0
# endif

      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendW
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendE
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendS
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendN

      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvW
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvE
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvS
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvN

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 39)
# endif
!
!-----------------------------------------------------------------------
!  Send and recive tile boundary data.
!-----------------------------------------------------------------------
!
!  Set tile partition table for looking up adjacent processes. Notice
!  that a null value is used in places that data transmition is not
!  required.
!
# if defined MPI
      Null_Value=MPI_PROC_NULL
# else
      Null_Value=-1
# endif
      DO j=-1,NtileJ(ng)
        DO i=-1,NtileI(ng)
          table(i,j)=Null_Value
        END DO
      END DO
      rank=0
      DO j=0,NtileJ(ng)-1
        DO i=0,NtileI(ng)-1
          table(i,j)=rank
          IF (MyRank.eq.rank) THEN
            MyRankI=i
            MyRankJ=j
          END IF
          rank=rank+1            
        END DO
      END DO
!
!  Set boundary communication tags and exchange switches.
!
      Wtag=1
      Stag=2
      Etag=3
      Ntag=4
      Wexchange=.FALSE.
      Sexchange=.FALSE.
      Eexchange=.FALSE.
      Nexchange=.FALSE.
!
!  Set swiches to process additional variables for aggregations.
!
      processB=.FALSE.
      processC=.FALSE.
      processD=.FALSE.
      IF (Nvar.gt.1) processB=.TRUE.
      IF (Nvar.gt.2) processC=.TRUE.
      IF (Nvar.gt.3) processD=.TRUE.
!
!-----------------------------------------------------------------------
!  Process Western and Eastern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Jmin=LBj
      Jmax=UBj
      Jlen=Jmax-Jmin+1
      Klen=UBk-LBk+1
      JKlen=Jlen*Klen
      EWsize=Nvar*(Nghost+pp)*JKlen
!
!  Determine the rank of Western and Eastern tiles.  Then, determine
!  the number of ghost-points to send and receive in the West- and
!  East-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2       
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2   Lm+3
!                      -2    -1     0    1     2      3
!
      IF (EWperiodic) THEN
        IF ((table(MyRankI-1,MyRankJ).eq.Null_Value).and.               &
     &      (NtileI(ng).gt.1)) THEN
          Wtile=table(NtileI(ng)-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
# if defined THREE_GHOST
          GrecvW=Nghost
# else
          GrecvW=Nghost+1
# endif
          GrecvE=Nghost
        ELSE IF ((table(MyRankI+1,MyRankJ).eq.Null_Value).and.          &
     &           (NtileI(ng).gt.1)) THEN
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(0,MyRankJ)
          GsendW=Nghost
# if defined THREE_GHOST
          GsendE=Nghost
# else
          GsendE=Nghost+1
# endif
          GrecvW=Nghost
          GrecvE=Nghost
        ELSE        
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
          GrecvW=Nghost
          GrecvE=Nghost
        END IF
      ELSE
        Wtile=table(MyRankI-1,MyRankJ)
        Etile=table(MyRankI+1,MyRankJ)
        GsendW=Nghost
        GsendE=Nghost
        GrecvW=Nghost
        GrecvE=Nghost
      END IF
      IF (Wtile.ne.Null_Value) Wexchange=.TRUE.
      IF (Etile.ne.Null_Value) Eexchange=.TRUE.
!
!  Pack tile boundary data including ghost-points.
!
      IF (Wexchange) THEN
        sizeW=0
        DO m=1,GsendW
          mc=(m-1)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jkW=1+(j-Jmin)+kc+mc
              AsendW(jkW)=A(Istr+m-1,j,k)
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=joff+1+(j-Jmin)+kc+mc
                AsendW(jkW)=B(Istr+m-1,j,k)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=joff+1+(j-Jmin)+kc+mc
                AsendW(jkW)=C(Istr+m-1,j,k)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=joff+1+(j-Jmin)+kc+mc
                AsendW(jkW)=D(Istr+m-1,j,k)
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
        sizeE=0
        DO m=1,GsendE
          mc=(m-1)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jkE=1+(j-Jmin)+kc+mc
              AsendE(jkE)=A(Iend-GsendE+m,j,k)
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=joff+1+(j-Jmin)+kc+mc
                AsendE(jkE)=B(Iend-GsendE+m,j,k)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=joff+1+(j-Jmin)+kc+mc
                AsendE(jkE)=C(Iend-GsendE+m,j,k)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=joff+1+(j-Jmin)+kc+mc
                AsendE(jkE)=D(Iend-GsendE+m,j,k)
              END DO
            END DO
          END DO
        END IF
      END IF        
!
!-----------------------------------------------------------------------
!  Send and receive Western and Eastern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Wexchange) THEN
        CALL mpi_irecv (ArecvW, EWsize, MP_FLOAT, Wtile, Etag,          &
     &                  OCN_COMM_WORLD, Wrequest, Werror)
      END IF
      IF (Eexchange) THEN
        CALL mpi_irecv (ArecvE, EWsize, MP_FLOAT, Etile, Wtag,          &
     &                  OCN_COMM_WORLD, Erequest, Eerror)
      END IF
      IF (Wexchange) THEN
        CALL mpi_send  (AsendW, sizeW, MP_FLOAT, Wtile, Wtag,           &
     &                  OCN_COMM_WORLD, Werror)
      END IF
      IF (Eexchange) THEN
        CALL mpi_send  (AsendE, sizeE, MP_FLOAT, Etile, Etag,           &
     &                  OCN_COMM_WORLD, Eerror)
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack Eastern and Western segments.
!-----------------------------------------------------------------------
!
      IF (Wexchange) THEN
# ifdef MPI
        CALL mpi_wait (Wrequest, status(1,1), Werror)
        IF (Werror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Werror, string, LEN(string), Werror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Western Edge)',        &
     &                      MyRank, Werror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=GrecvW,1,-1
          mc=(GrecvW-m)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              jkW=1+(j-Jmin)+kc+mc
              A(Istr-m,j,k)=ArecvW(jkW)
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=joff+1+(j-Jmin)+kc+mc
                B(Istr-m,j,k)=ArecvW(jkW)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=joff+1+(j-Jmin)+kc+mc
                C(Istr-m,j,k)=ArecvW(jkW)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=joff+1+(j-Jmin)+kc+mc
                D(Istr-m,j,k)=ArecvW(jkW)
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
# ifdef MPI
        CALL mpi_wait (Erequest, status(1,3), Eerror)
        IF (Eerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Eerror, string, LEN(string), Eerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Eastern Edge)',        &
     &                      MyRank, Eerror, string(1:Lstr)
 10       FORMAT (/,' MP_EXCHANGE3D - error during ',a,                 &
     &            ' call, Node = ',i3.3,' Error = ',i3,/,15x,a)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GrecvE
          mc=(m-1)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              jkE=1+(j-Jmin)+kc+mc
              A(Iend+m,j,k)=ArecvE(jkE)
            END DO
          ENDDO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=joff+1+(j-Jmin)+kc+mc
                B(Iend+m,j,k)=ArecvE(jkE)
              END DO
            ENDDO
          END DO
        END IF
        IF (processC) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=joff+1+(j-Jmin)+kc+mc
                C(Iend+m,j,k)=ArecvE(jkE)
              END DO
            ENDDO
          END DO
        END IF
        IF (processD) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=joff+1+(j-Jmin)+kc+mc
                D(Iend+m,j,k)=ArecvE(jkE)
              END DO
            ENDDO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Process Southern and Northern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Imin=LBi
      Imax=UBi
      Ilen=Imax-Imin+1
      Klen=UBk-LBk+1
      IKlen=Ilen*Klen
      NSsize=Nvar*(Nghost+pp)*IKlen
!
!  Determine the rank of Southern and Northern tiles.  Then, determine
!  the number of ghost-points to send and receive in the South- and
!  North-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2  Mm+3
!                      -2    -1     0    1     2     3
!
      IF (NSperiodic) THEN
        IF ((table(MyRankI,MyRankJ-1).eq.Null_Value).and.               &
     &      (NtileJ(ng).gt.1)) THEN
          Stile=table(MyRankI,NtileJ(ng)-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
# if defined THREE_GHOST
          GrecvS=Nghost
# else
          GrecvS=Nghost+1
# endif
          GrecvN=Nghost
        ELSE IF ((table(MyRankI,MyRankJ+1).eq.Null_Value).and.          &
     &           (NtileJ(ng).gt.1)) then
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,0)
          GsendS=Nghost
# if defined THREE_GHOST
          GsendN=Nghost
# else
          GsendN=Nghost+1
# endif
          GrecvS=Nghost
          GrecvN=Nghost
        ELSE
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
          GrecvS=Nghost
          GrecvN=Nghost
        END IF
      ELSE
        Stile=table(MyRankI,MyRankJ-1)
        Ntile=table(MyRankI,MyRankJ+1)
        GsendS=Nghost
        GsendN=Nghost
        GrecvS=Nghost
        GrecvN=Nghost
      END IF
      IF (Stile.ne.Null_Value) Sexchange=.TRUE.
      IF (Ntile.ne.Null_Value) Nexchange=.TRUE.
!
!  Pack tile boundary data including ghost-points.
!
      IF (Sexchange) THEN
        sizeS=0
        DO m=1,GsendS
          mc=(m-1)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              ikS=1+(i-Imin)+kc+mc
              AsendS(ikS)=A(i,Jstr+m-1,k)
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=ioff+1+(i-Imin)+kc+mc
                AsendS(ikS)=B(i,Jstr+m-1,k)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=ioff+1+(i-Imin)+kc+mc
                AsendS(ikS)=C(i,Jstr+m-1,k)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=ioff+1+(i-Imin)+kc+mc
                AsendS(ikS)=D(i,Jstr+m-1,k)
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
        sizeN=0
        DO m=1,GsendN
          mc=(m-1)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              ikN=1+(i-Imin)+kc+mc
              AsendN(ikN)=A(i,Jend-GsendN+m,k)
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=ioff+1+(i-Imin)+kc+mc
                AsendN(ikN)=B(i,Jend-GsendN+m,k)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=ioff+1+(i-Imin)+kc+mc
                AsendN(ikN)=C(i,Jend-GsendN+m,k)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=ioff+1+(i-Imin)+kc+mc
                AsendN(ikN)=D(i,Jend-GsendN+m,k)
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Southern and Northern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Sexchange) THEN
        CALL mpi_irecv (ArecvS, NSsize, MP_FLOAT, Stile, Ntag,          &
     &                  OCN_COMM_WORLD, Srequest, Serror)
      END IF
      IF (Nexchange) THEN
        CALL mpi_irecv (ArecvN, NSsize, MP_FLOAT, Ntile, Stag,          &
     &                  OCN_COMM_WORLD, Nrequest, Nerror)
      END IF
      IF (Sexchange) THEN
        CALL mpi_send  (AsendS, sizeS, MP_FLOAT, Stile, Stag,           &
     &                  OCN_COMM_WORLD, Serror)
      END IF
      IF (Nexchange) THEN
        CALL mpi_send  (AsendN, sizeN, MP_FLOAT, Ntile, Ntag,           &
     &                  OCN_COMM_WORLD, Nerror)
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack Northern and Southern segments.
!-----------------------------------------------------------------------
!
      IF (Sexchange) THEN
# ifdef MPI
        CALL mpi_wait (Srequest, status(1,2), Serror)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Serror, string, LEN(string), Serror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Southern Edge)',       &
     &                      MyRank, Serror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=GrecvS,1,-1
          mc=(GrecvS-m)*IKlen     
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              ikS=1+(i-Imin)+kc+mc
              A(i,Jstr-m,k)=ArecvS(ikS)
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKlen     
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=ioff+1+(i-Imin)+kc+mc
                B(i,Jstr-m,k)=ArecvS(ikS)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKlen     
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=ioff+1+(i-Imin)+kc+mc
                C(i,Jstr-m,k)=ArecvS(ikS)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKlen     
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=ioff+1+(i-Imin)+kc+mc
                D(i,Jstr-m,k)=ArecvS(ikS)
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
# ifdef MPI
        CALL mpi_wait (Nrequest, status(1,4), Nerror)
        IF (Nerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Nerror, string, LEN(string), Nerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Northern Edge)',       &
     &                      MyRank, Nerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GrecvN
          mc=(m-1)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              ikN=1+(i-Imin)+kc+mc
              A(i,Jend+m,k)=ArecvN(ikN)
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=ioff+1+(i-Imin)+kc+mc
                B(i,Jend+m,k)=ArecvN(ikN)
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=ioff+1+(i-Imin)+kc+mc
                C(i,Jend+m,k)=ArecvN(ikN)
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=ioff+1+(i-Imin)+kc+mc
                D(i,Jend+m,k)=ArecvN(ikN)
              END DO
            END DO
          END DO
        END IF
      END IF
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 39)
# endif

      RETURN
      END SUBROUTINE mp_exchange3d

      SUBROUTINE mp_exchange4d (ng, model, Nvar,                        &
     &                          Istr, Iend, Jstr, Jend,                 &
     &                          LBi, UBi, LBj, UBj, LBk, UBk, LBt, UBt, &
     &                          Nghost, EWperiodic, NSperiodic,         &
     &                          A, B)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================ Enrique Curchitser ===
!                                                                      !
!  This routine updates the tile overlap halo of  NV 4D variables.     !
!  It exchanges the specified number of "ghost-points".  In  order     !
!  to minimize the number send and receive calls, the ghost-points     !
!  are included in the buffers.  Therefore, the order of the pack,     !
!  send, receive, and unpack is crucial.                               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nvar       Number of variables for aggregated exchanges.         !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound.                              !
!     UBk        K-dimension Upper bound.                              !
!     LBt        T-dimension Lower bound.                              !
!     UBt        T-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     EWperiodic Switch indicating EW periodicity exchanges.           !
!     NSperiodic Switch indicating NS periodicity exchanges.           !
!     A          4D tiled array to process.                            !
!     B          4D tiled array (optional) to process.                 !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated 4D tiled array.                               !
!     B          Updated 4D tiled array (optional).                    !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: EWperiodic, NSperiodic

      integer, intent(in) :: ng, model, Nvar
      integer, intent(in) :: Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk, LBt, UBt
      integer, intent(in) :: Nghost

      real(r8), intent(inout) :: A(LBi:UBi,LBj:UBj,LBk:UBk,LBt:UBt)

      real(r8), intent(inout), optional ::                              &
     &                           B(LBi:UBi,LBj:UBj,LBk:UBk,LBt:UBt)
!
!  Local variable declarations.
!
      logical :: Wexchange, Sexchange, Eexchange, Nexchange
      logical :: processB

      integer :: i, ikS, ikN, ioff, Imin, Imax, Ilen, IKlen, IKTlen
      integer :: j, jkW, jkE, joff, Jmin, Jmax, Jlen, JKlen, JKTlen
      integer :: k, kc,  m, mc, Klen, Lstr, MyRankI, MyRankJ, rank
      integer :: l, lc, Tlen
      integer :: Wtile, GsendW, GrecvW, Wtag, Werror, Wrequest
      integer :: Stile, GsendS, GrecvS, Stag, Serror, Srequest
      integer :: Etile, GsendE, GrecvE, Etag, Eerror, Erequest
      integer :: Ntile, GsendN, GrecvN, Ntag, Nerror, Nrequest
      integer :: EWsize, sizeW, sizeE
      integer :: NSsize, sizeS, sizeN
      integer :: Null_Value

      integer, dimension(-1:NtileI(ng),-1:NtileJ(ng)) :: table
# ifdef MPI
      integer, dimension(MPI_STATUS_SIZE,4) :: status
# endif

# if defined EW_PERIODIC || defined NS_PERIODIC
      integer, parameter :: pp = 1
# else
      integer, parameter :: pp = 0
# endif

      real(r8) :: AsendW(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: AsendE(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: AsendS(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: AsendN(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))

      real(r8) :: ArecvW(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: ArecvE(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: ArecvS(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: ArecvN(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 39)
# endif
!
!-----------------------------------------------------------------------
!  Send and recive tile boundary data.
!-----------------------------------------------------------------------
!
!  Set tile partition table for looking up adjacent processes. Notice
!  that a null value is used in places that data transmition is not
!  required.
!
# if defined MPI
      Null_Value=MPI_PROC_NULL
# else
      Null_Value=-1
# endif
      DO j=-1,NtileJ(ng)
        DO i=-1,NtileI(ng)
          table(i,j)=Null_Value
        END DO
      END DO
      rank=0
      DO j=0,NtileJ(ng)-1
        DO i=0,NtileI(ng)-1
          table(i,j)=rank
          IF (MyRank.eq.rank) THEN
            MyRankI=i
            MyRankJ=j
          END IF
          rank=rank+1            
        END DO
      END DO
!
!  Set boundary communication tags and exchange switches.
!
      Wtag=1
      Stag=2
      Etag=3
      Ntag=4
      Wexchange=.FALSE.
      Sexchange=.FALSE.
      Eexchange=.FALSE.
      Nexchange=.FALSE.
!
!  Set swiches to process additional variables for aggregations.
!
      processB=.FALSE.
      IF (Nvar.gt.1) processB=.TRUE.
!
!-----------------------------------------------------------------------
!  Process Western and Eastern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Jmin=LBj
      Jmax=UBj
      Jlen=Jmax-Jmin+1
      Klen=UBk-LBk+1
      Tlen=UBt-LBt+1
      JKlen=Jlen*Klen
      JKTlen=JKlen*Tlen
      EWsize=Nvar*(Nghost+pp)*JKTlen
!
!  Determine the rank of Western and Eastern tiles.  Then, determine
!  the number of ghost-points to send and receive in the West- and
!  East-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2       
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2   Lm+3
!                      -2    -1     0    1     2      3
!
      IF (EWperiodic) THEN
        IF ((table(MyRankI-1,MyRankJ).eq.Null_Value).and.               &
     &      (NtileI(ng).gt.1)) THEN
          Wtile=table(NtileI(ng)-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
# if defined THREE_GHOST
          GrecvW=Nghost
# else
          GrecvW=Nghost+1
# endif
          GrecvE=Nghost
        ELSE IF ((table(MyRankI+1,MyRankJ).eq.Null_Value).and.          &
     &           (NtileI(ng).gt.1)) THEN
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(0,MyRankJ)
          GsendW=Nghost
# if defined THREE_GHOST
          GsendE=Nghost
# else
          GsendE=Nghost+1
# endif
          GrecvW=Nghost
          GrecvE=Nghost
        ELSE        
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
          GrecvW=Nghost
          GrecvE=Nghost
        END IF
      ELSE
        Wtile=table(MyRankI-1,MyRankJ)
        Etile=table(MyRankI+1,MyRankJ)
        GsendW=Nghost
        GsendE=Nghost
        GrecvW=Nghost
        GrecvE=Nghost
      END IF
      IF (Wtile.ne.Null_Value) Wexchange=.TRUE.
      IF (Etile.ne.Null_Value) Eexchange=.TRUE.
!
!  Pack tile boundary data including ghost-points.
!
      IF (Wexchange) THEN
        sizeW=0
        DO m=1,GsendW
          mc=(m-1)*JKTlen
          DO l=LBt,UBt
           lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=1+(j-Jmin)+kc+lc+mc
                AsendW(jkW)=A(Istr+m-1,j,k,l)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  sizeW=sizeW+1
                  jkW=joff+1+(j-Jmin)+kc+lc+mc
                  AsendW(jkW)=A(Istr+m-1,j,k,l)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
        sizeE=0
        DO m=1,GsendE
          mc=(m-1)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=1+(j-Jmin)+kc+lc+mc
                AsendE(jkE)=A(Iend-GsendE+m,j,k,l)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  sizeE=sizeE+1
                  jkE=joff+1+(j-Jmin)+kc+lc+mc
                  AsendE(jkE)=B(Iend-GsendE+m,j,k,l)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF        
!
!-----------------------------------------------------------------------
!  Send and receive Western and Eastern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Wexchange) THEN
        CALL mpi_irecv (ArecvW, EWsize, MP_FLOAT, Wtile, Etag,          &
     &                  OCN_COMM_WORLD, Wrequest, Werror)
      END IF
      IF (Eexchange) THEN
        CALL mpi_irecv (ArecvE, EWsize, MP_FLOAT, Etile, Wtag,          &
     &                  OCN_COMM_WORLD, Erequest, Eerror)
      END IF
      IF (Wexchange) THEN
        CALL mpi_send  (AsendW, sizeW, MP_FLOAT, Wtile, Wtag,           &
     &                  OCN_COMM_WORLD, Werror)
      END IF
      IF (Eexchange) THEN
        CALL mpi_send  (AsendE, sizeE, MP_FLOAT, Etile, Etag,           &
     &                  OCN_COMM_WORLD, Eerror)
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack Eastern and Western segments.
!-----------------------------------------------------------------------
!
      IF (Wexchange) THEN
# ifdef MPI
        CALL mpi_wait (Wrequest, status(1,1), Werror)
        IF (Werror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Werror, string, LEN(string), Werror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Western Edge)',        &
     &                      MyRank, Werror, string(1:Lstr)
 10       FORMAT (/,' MP_EXCHANGE4D - error during ',a,                 &
     &            ' call, Node = ',i3.3,' Error = ',i3,/,15x,a)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=GrecvW,1,-1
          mc=(GrecvW-m)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen      
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=1+(j-Jmin)+kc+lc+mc
                A(Istr-m,j,k,l)=ArecvW(jkW)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  jkW=joff+1+(j-Jmin)+kc+lc+mc
                  B(Istr-m,j,k,l)=ArecvW(jkW)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
# ifdef MPI
        CALL mpi_wait (Erequest, status(1,3), Eerror)
        IF (Eerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Eerror, string, LEN(string), Eerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Eastern Edge)',        &
     &                      MyRank, Eerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GrecvE
          mc=(m-1)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=1+(j-Jmin)+kc+lc+mc
                A(Iend+m,j,k,l)=ArecvE(jkE)
              END DO
            END DO
          ENDDO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  jkE=joff+1+(j-Jmin)+kc+lc+mc
                  B(Iend+m,j,k,l)=ArecvE(jkE)
                END DO
              END DO
            ENDDO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Process Southern and Northern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Imin=LBi
      Imax=UBi
      Ilen=Imax-Imin+1
      Klen=UBk-LBk+1
      Tlen=UBt-LBt+1
      IKlen=Ilen*Klen
      IKTlen=IKlen*Tlen
      NSsize=Nvar*(Nghost+pp)*IKTlen
!
!  Determine the rank of Southern and Northern tiles.  Then, determine
!  the number of ghost-points to send and receive in the South- and
!  North-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2  Mm+3
!                      -2    -1     0    1     2     3
!
      IF (NSperiodic) THEN
        IF ((table(MyRankI,MyRankJ-1).eq.Null_Value).and.               &
     &      (NtileJ(ng).gt.1)) THEN
          Stile=table(MyRankI,NtileJ(ng)-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
# if defined THREE_GHOST
          GrecvS=Nghost
# else
          GrecvS=Nghost+1
# endif
          GrecvN=Nghost
        ELSE IF ((table(MyRankI,MyRankJ+1).eq.Null_Value).and.          &
     &           (NtileJ(ng).gt.1)) then
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,0)
          GsendS=Nghost
# if defined THREE_GHOST
          GsendN=Nghost
# else
          GsendN=Nghost+1
# endif
          GrecvS=Nghost
          GrecvN=Nghost
        ELSE
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
          GrecvS=Nghost
          GrecvN=Nghost
        END IF
      ELSE
        Stile=table(MyRankI,MyRankJ-1)
        Ntile=table(MyRankI,MyRankJ+1)
        GsendS=Nghost
        GsendN=Nghost
        GrecvS=Nghost
        GrecvN=Nghost
      END IF
      IF (Stile.ne.Null_Value) Sexchange=.TRUE.
      IF (Ntile.ne.Null_Value) Nexchange=.TRUE.
!
!  Pack tile boundary data including ghost-points.
!
      IF (Sexchange) THEN
        sizeS=0
        DO m=1,GsendS
          mc=(m-1)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=1+(i-Imin)+kc+lc+mc
                AsendS(ikS)=A(i,Jstr+m-1,k,l)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  sizeS=sizeS+1
                  ikS=ioff+1+(i-Imin)+kc+lc+mc
                  AsendS(ikS)=B(i,Jstr+m-1,k,l)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
        sizeN=0
        DO m=1,GsendN
          mc=(m-1)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=1+(i-Imin)+kc+lc+mc
                AsendN(ikN)=A(i,Jend-GsendN+m,k,l)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  sizeN=sizeN+1
                  ikN=ioff+1+(i-Imin)+kc+lc+mc
                  AsendN(ikN)=A(i,Jend-GsendN+m,k,l)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Southern and Northern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Sexchange) THEN
        CALL mpi_irecv (ArecvS, NSsize, MP_FLOAT, Stile, Ntag,          &
     &                  OCN_COMM_WORLD, Srequest, Serror)
      END IF
      IF (Nexchange) THEN
        CALL mpi_irecv (ArecvN, NSsize, MP_FLOAT, Ntile, Stag,          &
     &                  OCN_COMM_WORLD, Nrequest, Nerror)
      END IF
      IF (Sexchange) THEN
        CALL mpi_send  (AsendS, sizeS, MP_FLOAT, Stile, Stag,           &
     &                  OCN_COMM_WORLD, Serror)
      END IF
      IF (Nexchange) THEN
        CALL mpi_send  (AsendN, sizeN, MP_FLOAT, Ntile, Ntag,           &
     &                  OCN_COMM_WORLD, Nerror)
      END IF
# endif
!
!-----------------------------------------------------------------------
!  Unpack Northern and Southern segments.
!-----------------------------------------------------------------------
!
      IF (Sexchange) THEN
# ifdef MPI
        CALL mpi_wait (Srequest, status(1,2), Serror)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Serror, string, LEN(string), Serror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Southern Edge)',       &
     &                      MyRank, Serror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=GrecvS,1,-1
          mc=(GrecvS-m)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=1+(i-Imin)+kc+lc+mc
                A(i,Jstr-m,k,l)=ArecvS(ikS)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  ikS=ioff+1+(i-Imin)+kc+lc+mc
                  B(i,Jstr-m,k,l)=ArecvS(ikS)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
# ifdef MPI
        CALL mpi_wait (Nrequest, status(1,4), Nerror)
        IF (Nerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Nerror, string, LEN(string), Nerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Northern Edge)',       &
     &                      MyRank, Nerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GrecvN
          mc=(m-1)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=1+(i-Imin)+kc+lc+mc
                A(i,Jend+m,k,l)=ArecvN(ikN)
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  ikN=ioff+1+(i-Imin)+kc+lc+mc
                  B(i,Jend+m,k,l)=ArecvN(ikN)
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 39)
# endif

      RETURN
      END SUBROUTINE mp_exchange4d

      SUBROUTINE ad_mp_exchange2d (ng, model, Nvar,                     &
     &                             Istr, Iend, Jstr, Jend,              &
     &                             LBi, UBi, LBj, UBj,                  &
     &                             Nghost, EWperiodic, NSperiodic,      &
     &                             A, B, C, D)
!
!=================================================== Andrew M. Moore ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  Adjoint Exchange Communications:                                    !
!                                                                      !
!  This routine updates the tile overlap halo of  NV 2D variables.     !
!  It exchanges the specified number of "ghost-points".  In  order     !
!  to minimize the number send and receive calls, the ghost-points     !
!  are included in the buffers.  Therefore, the order of the pack,     !
!  send, receive, and unpack is crucial.                               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nvar       Number of variables for aggregated exchanges.         !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     EWperiodic Switch indicating EW periodicity exchanges.           !
!     NSperiodic Switch indicating NS periodicity exchanges.           !
!     A          2D tiled adjoint array to process.                    !
!     B          2D tiled adjoint array (optional) to process.         !
!     C          2D tiled adjoint array (optional) to process.         !
!     D          2D tiled adjoint array (optional) to process.         !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated 2D tiled adjoint array.                       !
!     B          Updated 2D tiled adjoint array (optional).            !
!     C          Updated 2D tiled adjoint array (optional).            !
!     D          Updated 2D tiled adjoint array (optional).            !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: EWperiodic, NSperiodic

      integer, intent(in) :: ng, model, Nvar
      integer, intent(in) :: Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj
      integer, intent(in) :: Nghost

      real(r8), intent(inout) :: A(LBi:UBi,LBj:UBj)

      real(r8), intent(inout), optional :: B(LBi:UBi,LBj:UBj)
      real(r8), intent(inout), optional :: C(LBi:UBi,LBj:UBj)
      real(r8), intent(inout), optional :: D(LBi:UBi,LBj:UBj)
!
!  Local variable declarations.
!
      logical :: Wexchange, Sexchange, Eexchange, Nexchange
      logical :: processB, processC, processD

      integer :: i, icS, icN, ioff, Imin, Imax, Ilen
      integer :: j, jcW, jcE, joff, Jmin, Jmax, Jlen
      integer :: m, mc, Lstr, MyRankI, MyRankJ, rank
      integer :: Wtile, GsendW, GrecvW, Wtag, Werror, Wrequest
      integer :: Stile, GsendS, GrecvS, Stag, Serror, Srequest
      integer :: Etile, GsendE, GrecvE, Etag, Eerror, Erequest
      integer :: Ntile, GsendN, GrecvN, Ntag, Nerror, Nrequest
      integer :: EWsize, sizeW, sizeE
      integer :: NSsize, sizeS, sizeN
      integer :: BufferSize
      integer :: Null_Value

      integer, dimension(-1:NtileI(ng),-1:NtileJ(ng)) :: table
# ifdef MPI
      integer, dimension(MPI_STATUS_SIZE,4) :: status
# endif

# if defined EW_PERIODIC || defined NS_PERIODIC
      integer, parameter :: pp = 1
# else
      integer, parameter :: pp = 0
# endif

      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendW
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendE
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendS
      real(r8), dimension(Nvar*HaloSize(ng)) :: AsendN

      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvW
      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvE
      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvS
      real(r8), dimension(Nvar*HaloSize(ng)) :: ArecvN

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 39)
# endif
!
!-----------------------------------------------------------------------
!  Send and recive tile boundary data.
!-----------------------------------------------------------------------
!
!  Set tile partition table for looking up adjacent processes. Notice
!  that a null value is used in places that data transmition is not
!  required.
!
# if defined MPI
      Null_Value=MPI_PROC_NULL
# else
      Null_Value=-1
# endif
      DO j=-1,NtileJ(ng)
        DO i=-1,NtileI(ng)
          table(i,j)=Null_Value
        END DO
      END DO
      rank=0
      DO j=0,NtileJ(ng)-1
        DO i=0,NtileI(ng)-1
          table(i,j)=rank
          IF (MyRank.eq.rank) THEN
            MyRankI=i
            MyRankJ=j
          END IF
          rank=rank+1            
        END DO
      END DO
!
!  Set boundary communication tags and exchange switches.
!
      Wtag=1
      Stag=2
      Etag=3
      Ntag=4
      Wexchange=.FALSE.
      Sexchange=.FALSE.
      Eexchange=.FALSE.
      Nexchange=.FALSE.
!
!  Set switches to process additional variables for aggregations.
!
      processB=.FALSE.
      processC=.FALSE.
      processD=.FALSE.
      IF (Nvar.gt.1) processB=.TRUE.
      IF (Nvar.gt.2) processC=.TRUE.
      IF (Nvar.gt.3) processD=.TRUE.
!
!-----------------------------------------------------------------------
!  Process Western and Eastern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Jmin=LBj
      Jmax=UBj
      Jlen=Jmax-Jmin+1
      EWsize=Nvar*(Nghost+pp)*Jlen
      BufferSize=Nvar*HaloSize(ng)
!
!  Determine the rank of Western and Eastern tiles.  Then, determine
!  the number of ghost-points to send and receive in the West- and
!  East-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2   Lm+3
!                      -2    -1     0    1     2      3
!
      IF (EWperiodic) THEN
        IF ((table(MyRankI-1,MyRankJ).eq.Null_Value).and.               &
     &      (NtileI(ng).gt.1)) THEN
          Wtile=table(NtileI(ng)-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
# if defined THREE_GHOST
          GrecvW=Nghost
# else
          GrecvW=Nghost+1
# endif
          GrecvE=Nghost
        ELSE IF ((table(MyRankI+1,MyRankJ).eq.Null_Value).and.          &
     &           (NtileI(ng).gt.1)) THEN
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(0,MyRankJ)
          GsendW=Nghost
# if defined THREE_GHOST
          GsendE=Nghost
# else
          GsendE=Nghost+1
# endif
          GrecvW=Nghost
          GrecvE=Nghost
        ELSE
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
          GrecvW=Nghost
          GrecvE=Nghost
        END IF
      ELSE
        Wtile=table(MyRankI-1,MyRankJ)
        Etile=table(MyRankI+1,MyRankJ)
        GsendW=Nghost
        GsendE=Nghost
        GrecvW=Nghost
        GrecvE=Nghost
      END IF
      IF (Wtile.ne.Null_Value) Wexchange=.TRUE.
      IF (Etile.ne.Null_Value) Eexchange=.TRUE.
!
!-----------------------------------------------------------------------
!  Adjoint of unpack Eastern and Western segments.
!-----------------------------------------------------------------------
!
      IF (Eexchange) THEN
        DO i=1,BufferSize
          ArecvE(i)=0.0_r8
          AsendE(i)=0.0_r8
        END DO
        sizeE=0
        DO m=1,GrecvE
          mc=(m-1)*Jlen
          DO j=Jmin,Jmax
            sizeE=sizeE+1
            jcE=1+(j-Jmin)+mc
!>          A(Iend+m,j)=ArecvE(jcE)
!>
            ArecvE(jcE)=A(Iend+m,j)
            A(Iend+m,j)=0.0_r8
          ENDDO
        END DO
        IF (processB) THEN
          joff=jcE
          DO m=1,GrecvE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jcE=joff+1+(j-Jmin)+mc
!>            B(Iend+m,j)=ArecvE(jcE)
!>
              ArecvE(jcE)=B(Iend+m,j)
              B(Iend+m,j)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcE
          DO m=1,GrecvE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jcE=joff+1+(j-Jmin)+mc
!>            C(Iend+m,j)=ArecvE(jcE)
!>
              ArecvE(jcE)=C(Iend+m,j)
              C(Iend+m,j)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcE
          DO m=1,GrecvE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jcE=joff+1+(j-Jmin)+mc
!>            D(Iend+m,j)=ArecvE(jcE)
!>
              ArecvE(jcE)=D(Iend+m,j)
              D(Iend+m,j)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
      IF (Wexchange) THEN
        DO i=1,BufferSize
          ArecvW(i)=0.0_r8
          AsendW(i)=0.0_r8
        END DO
        sizeW=0
        DO m=GrecvW,1,-1
          mc=(GrecvW-m)*Jlen
          DO j=Jmin,Jmax
            sizeW=sizeW+1
            jcW=1+(j-Jmin)+mc
!>          A(Istr-m,j)=ArecvW(jcW)
!>
            ArecvW(jcW)=A(Istr-m,j)
            A(Istr-m,j)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          joff=jcW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jcW=joff+1+(j-Jmin)+mc
!>            B(Istr-m,j)=ArecvW(jcW)
!>
              ArecvW(jcW)=B(Istr-m,j)
              B(Istr-m,j)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jcW=joff+1+(j-Jmin)+mc
!>            C(Istr-m,j)=ArecvW(jcW)
!>
              ArecvW(jcW)=C(Istr-m,j)
              C(Istr-m,j)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jcW=joff+1+(j-Jmin)+mc
!>            D(Istr-m,j)=ArecvW(jcW)
!>
              ArecvW(jcW)=D(Istr-m,j)
              D(Istr-m,j)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Western and Eastern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Wexchange) THEN
!>      CALL mpi_irecv (ArecvW, EWsize, MP_FLOAT, Wtile, Etag,          &
!>   &                  OCN_COMM_WORLD, Wrequest, Werror)
!>
        CALL mpi_irecv (AsendW, EWsize, MP_FLOAT, Wtile, Etag,          &
     &                  OCN_COMM_WORLD, Wrequest, Werror)
      END IF
      IF (Eexchange) THEN
!>      CALL mpi_irecv (ArecvE, EWsize, MP_FLOAT, Etile, Wtag,          &
!>   &                  OCN_COMM_WORLD, Erequest, Eerror)
!>
        CALL mpi_irecv (AsendE, EWsize, MP_FLOAT, Etile, Wtag,          &
     &                  OCN_COMM_WORLD, Erequest, Eerror)
      END IF
      IF (Wexchange) THEN
!>      CALL mpi_send  (AsendW, sizeW, MP_FLOAT, Wtile, Wtag,           &
!>   &                  OCN_COMM_WORLD, Werror)
!>
        CALL mpi_send  (ArecvW, sizeW, MP_FLOAT, Wtile, Wtag,           &
     &                  OCN_COMM_WORLD, Werror)
      END IF
      IF (Eexchange) THEN
!>      CALL mpi_send  (AsendE, sizeE, MP_FLOAT, Etile, Etag,           &
!>   &                  OCN_COMM_WORLD, Eerror)
!>
        CALL mpi_send  (ArecvE, sizeE, MP_FLOAT, Etile, Etag,           &
     &                  OCN_COMM_WORLD, Eerror)
      END IF
# endif
!
!  Adjoint of packing tile boundary data including ghost-points.
!
      IF (Wexchange) THEN
# ifdef MPI
        CALL mpi_wait (Wrequest, status(1,1), Werror)
        IF (Werror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Werror, string, LEN(string), Werror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Western Edge)',        &
     &                      MyRank, Werror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendW
          mc=(m-1)*Jlen
          DO j=Jmin,Jmax
            jcW=1+(j-Jmin)+mc
!>          AsendW(jcW)=A(Istr+m-1,j)
!>
            A(Istr+m-1,j)=A(Istr+m-1,j)+AsendW(jcW)
            AsendW(jcW)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          joff=jcW
          DO m=1,GsendW
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcW=joff+1+(j-Jmin)+mc
!>            AsendW(jcW)=B(Istr+m-1,j)
!>
              B(Istr+m-1,j)=B(Istr+m-1,j)+AsendW(jcW)
              AsendW(jcW)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcW
          DO m=1,GsendW
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcW=joff+1+(j-Jmin)+mc
!>            AsendW(jcW)=C(Istr+m-1,j)
!>
              C(Istr+m-1,j)=C(Istr+m-1,j)+AsendW(jcW)
              AsendW(jcW)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcW
          DO m=1,GsendW
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcW=joff+1+(j-Jmin)+mc
!>            AsendW(jcW)=D(Istr+m-1,j)
!>
              D(Istr+m-1,j)=D(Istr+m-1,j)+AsendW(jcW)
              AsendW(jcW)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
# ifdef MPI
        CALL mpi_wait (Erequest, status(1,3), Eerror)
        IF (Eerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Eerror, string, LEN(string), Eerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Eastern Edge)',        &
     &                      MyRank, Eerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendE
          mc=(m-1)*Jlen
          DO j=Jmin,Jmax
            jcE=1+(j-Jmin)+mc
!>          AsendE(jcE)=A(Iend-GsendE+m,j)
!>
            A(Iend-GsendE+m,j)=A(Iend-GsendE+m,j)+AsendE(jcE)
            AsendE(jcE)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          joff=jcE
          DO m=1,GsendE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcE=joff+1+(j-Jmin)+mc
!>            AsendE(jcE)=B(Iend-GsendE+m,j)
!>
              B(Iend-GsendE+m,j)=B(Iend-GsendE+m,j)+AsendE(jcE)
              AsendE(jcE)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jcE
          DO m=1,GsendE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcE=joff+1+(j-Jmin)+mc
!>            AsendE(jcE)=C(Iend-GsendE+m,j)
!>
              C(Iend-GsendE+m,j)=C(Iend-GsendE+m,j)+AsendE(jcE)
              AsendE(jcE)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jcE
          DO m=1,GsendE
            mc=(m-1)*Jlen
            DO j=Jmin,Jmax
              jcE=joff+1+(j-Jmin)+mc
!>            AsendE(jcE)=D(Iend-GsendE+m,j)
!>
              D(Iend-GsendE+m,j)=D(Iend-GsendE+m,j)+AsendE(jcE)
              AsendE(jcE)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Process Southern and Northern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Imin=LBi
      Imax=UBi
      Ilen=Imax-Imin+1
      NSsize=Nvar*(Nghost+pp)*Ilen
!
!  Determine the rank of Southern and Northern tiles.  Then, determine
!  the number of ghost-points to send and receive in the South- and
!  North-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2  Mm+3
!                      -2    -1     0    1     2     3
!
      IF (NSperiodic) THEN
        IF ((table(MyRankI,MyRankJ-1).eq.Null_Value).and.               &
     &      (NtileJ(ng).gt.1)) THEN
          Stile=table(MyRankI,NtileJ(ng)-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
# if defined THREE_GHOST
          GrecvS=Nghost
# else
          GrecvS=Nghost+1
# endif
          GrecvN=Nghost
        ELSE IF ((table(MyRankI,MyRankJ+1).eq.Null_Value).and.          &
     &           (NtileJ(ng).gt.1)) then
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,0)
          GsendS=Nghost
# if defined THREE_GHOST
          GsendN=Nghost
# else
          GsendN=Nghost+1
# endif
          GrecvS=Nghost
          GrecvN=Nghost
        ELSE
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
          GrecvS=Nghost
          GrecvN=Nghost
        END IF
      ELSE
        Stile=table(MyRankI,MyRankJ-1)
        Ntile=table(MyRankI,MyRankJ+1)
        GsendS=Nghost
        GsendN=Nghost
        GrecvS=Nghost
        GrecvN=Nghost
      END IF
      IF (Stile.ne.Null_Value) Sexchange=.TRUE.
      IF (Ntile.ne.Null_Value) Nexchange=.TRUE.
!
!-----------------------------------------------------------------------
!  Adjoint of unpacking Northern and Southern segments.
!-----------------------------------------------------------------------
!
      IF (Nexchange) THEN
        DO i=1,BufferSize
          ArecvN(i)=0.0_r8
          AsendN(i)=0.0_r8
        END DO
        sizeN=0
        DO m=1,GrecvN
          mc=(m-1)*Ilen
          DO i=Imin,Imax
            sizeN=sizeN+1
            icN=1+(i-Imin)+mc
!>          A(i,Jend+m)=ArecvN(icN)
!>
            ArecvN(icN)=A(i,Jend+m)
            A(i,Jend+m)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          ioff=icN
          DO m=1,GrecvN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              icN=ioff+1+(i-Imin)+mc
!>            B(i,Jend+m)=ArecvN(icN)
!>
              ArecvN(icN)=B(i,Jend+m)
              B(i,Jend+m)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icN
          DO m=1,GrecvN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              icN=ioff+1+(i-Imin)+mc
!>            C(i,Jend+m)=ArecvN(icN)
!>
              ArecvN(icN)=C(i,Jend+m)
              C(i,Jend+m)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icN
          DO m=1,GrecvN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              icN=ioff+1+(i-Imin)+mc
!>            D(i,Jend+m)=ArecvN(icN)
!>
              ArecvN(icN)=D(i,Jend+m)
              D(i,Jend+m)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
      IF (Sexchange) THEN
        DO i=1,BufferSize
          ArecvS(i)=0.0_r8
          AsendS(i)=0.0_r8
        END DO
        sizeS=0
        DO m=GrecvS,1,-1
          mc=(GrecvS-m)*Ilen
          DO i=Imin,Imax
            sizeS=sizeS+1
            icS=1+(i-Imin)+mc
!>          A(i,Jstr-m)=ArecvS(icS)
!>
            ArecvS(icS)=A(i,Jstr-m)
            A(i,Jstr-m)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          ioff=icS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              icS=ioff+1+(i-Imin)+mc
!>            B(i,Jstr-m)=ArecvS(icS)
!>
              ArecvS(icS)=B(i,Jstr-m)
              B(i,Jstr-m)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              icS=ioff+1+(i-Imin)+mc
!>            C(i,Jstr-m)=ArecvS(icS)
!>
              ArecvS(icS)=C(i,Jstr-m)
              C(i,Jstr-m)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              icS=ioff+1+(i-Imin)+mc
!>            D(i,Jstr-m)=ArecvS(icS)
!>
              ArecvS(icS)=D(i,Jstr-m)
              D(i,Jstr-m)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Adjoint of send and receive Southern and Northern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Sexchange) THEN
!>      CALL mpi_irecv (ArecvS, NSsize, MP_FLOAT, Stile, Ntag,          &
!>   &                  OCN_COMM_WORLD, Srequest, Serror)
!>
        CALL mpi_irecv (AsendS, NSsize, MP_FLOAT, Stile, Ntag,          &
     &                  OCN_COMM_WORLD, Srequest, Serror)
      END IF
      IF (Nexchange) THEN
!>      CALL mpi_irecv (ArecvN, NSsize, MP_FLOAT, Ntile, Stag,          &
!>   &                  OCN_COMM_WORLD, Nrequest, Nerror)
!>
        CALL mpi_irecv (AsendN, NSsize, MP_FLOAT, Ntile, Stag,          &
     &                  OCN_COMM_WORLD, Nrequest, Nerror)
      END IF
      IF (Sexchange) THEN
!>      CALL mpi_send  (AsendS, sizeS, MP_FLOAT, Stile, Stag,           &
!>   &                  OCN_COMM_WORLD, Serror)
!>
        CALL mpi_send  (ArecvS, sizeS, MP_FLOAT, Stile, Stag,           &
     &                  OCN_COMM_WORLD, Serror)
      END IF
      IF (Nexchange) THEN
!>      CALL mpi_send  (AsendN, sizeN, MP_FLOAT, Ntile, Ntag,           &
!>   &                  OCN_COMM_WORLD, Nerror)
!>
        CALL mpi_send  (ArecvN, sizeN, MP_FLOAT, Ntile, Ntag,           &
     &                  OCN_COMM_WORLD, Nerror)
      END IF
# endif
!
!  Adjoint of packing tile boundary data including ghost-points.
!
      IF (Sexchange) THEN            
# ifdef MPI
        CALL mpi_wait (Srequest, status(1,2), Serror)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Serror, string, LEN(string), Serror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Southern Edge)',       &
     &                      MyRank, Serror, string(1:Lstr)
 10       FORMAT (/,' AD_MP_EXCHANGE2D - error during ',a,' call,',     &
     &            ' Node = ', i3.3,' Error = ',i3,/,18x,a)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendS
          mc=(m-1)*Ilen
          DO i=Imin,Imax
            icS=1+(i-Imin)+mc
!>          AsendS(icS)=A(i,Jstr+m-1)
!>
            A(i,Jstr+m-1)=A(i,Jstr+m-1)+AsendS(icS)
            AsendS(icS)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          ioff=icS
          DO m=1,GsendS
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icS=ioff+1+(i-Imin)+mc
!>            AsendS(icS)=B(i,Jstr+m-1)
!>
              B(i,Jstr+m-1)=B(i,Jstr+m-1)+AsendS(icS)
              AsendS(icS)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icS
          DO m=1,GsendS
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icS=ioff+1+(i-Imin)+mc
!>            AsendS(icS)=C(i,Jstr+m-1)
!>
              C(i,Jstr+m-1)=C(i,Jstr+m-1)+AsendS(icS)
              AsendS(icS)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icS
          DO m=1,GsendS
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icS=ioff+1+(i-Imin)+mc
!>            AsendS(icS)=D(i,Jstr+m-1)
!>
              D(i,Jstr+m-1)=D(i,Jstr+m-1)+AsendS(icS)
              AsendS(icS)=0.0_r8
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
# ifdef MPI
        CALL mpi_wait (Nrequest, status(1,4), Nerror)
        IF (Nerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Nerror, string, LEN(string), Nerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Northern Edge)',       &
     &                      MyRank, Nerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendN
          mc=(m-1)*Ilen
          DO i=Imin,Imax
            icN=1+(i-Imin)+mc
!>          AsendN(icN)=A(i,Jend-GsendN+m)
!>
            A(i,Jend-GsendN+m)=A(i,Jend-GsendN+m)+AsendN(icN)
            AsendN(icN)=0.0_r8
          END DO
        END DO
        IF (processB) THEN
          ioff=icN
          DO m=1,GsendN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icN=ioff+1+(i-Imin)+mc
!>            AsendN(icN)=B(i,Jend-GsendN+m)
!>
              B(i,Jend-GsendN+m)=B(i,Jend-GsendN+m)+AsendN(icN)
              AsendN(icN)=0.0_r8
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=icN
          DO m=1,GsendN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icN=ioff+1+(i-Imin)+mc
!>            AsendN(icN)=C(i,Jend-GsendN+m)
!>
              C(i,Jend-GsendN+m)=C(i,Jend-GsendN+m)+AsendN(icN)
              AsendN(icN)=0.0_r8
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=icN
          DO m=1,GsendN
            mc=(m-1)*Ilen
            DO i=Imin,Imax
              icN=ioff+1+(i-Imin)+mc
!>            AsendN(icN)=D(i,Jend-GsendN+m)
!>
              D(i,Jend-GsendN+m)=D(i,Jend-GsendN+m)+AsendN(icN)
              AsendN(icN)=0.0_r8
            END DO
          END DO
        END IF
      END IF
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 39)
# endif

      RETURN
      END SUBROUTINE ad_mp_exchange2d

      SUBROUTINE ad_mp_exchange3d (ng, model, Nvar,                     &
     &                             Istr, Iend, Jstr, Jend,              &
     &                             LBi, UBi, LBj, UBj, LBk, UBk,        &
     &                             Nghost, EWperiodic, NSperiodic,      &
     &                             A, B, C, D)
!
!=================================================== Andrew M. Moore ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  Adjoint Exchange Communications:                                    !
!                                                                      !
!  This routine updates the tile overlap halo of  NV 3D variables.     !
!  It exchanges the specified number of "ghost-points".  In  order     !
!  to minimize the number send and receive calls, the ghost-points     !
!  are included in the buffers.  Therefore, the order of the pack,     !
!  send, receive, and unpack is crucial.                               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nvar       Number of variables for aggregated exchanges.         !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound.                              !
!     UBk        K-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     EWperiodic Switch indicating EW periodicity exchanges.           !
!     NSperiodic Switch indicating NS periodicity exchanges.           !
!     A          3d tiled adjoint array to process.                    !
!     B          3d tiled adjoint array (optional) to process.         !
!     C          3d tiled adjoint array (optional) to process.         !
!     D          3d tiled adjoint array (optional) to process.         !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated 3D tiled adjoint array.                       !
!     B          Updated 3D tiled adjoint array (optional).            !
!     C          Updated 3D tiled adjoint array (optional).            !
!     D          Updated 3D tiled adjoint array (optional).            !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: EWperiodic, NSperiodic

      integer, intent(in) :: ng, model, Nvar
      integer, intent(in) :: Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(in) :: Nghost

      real(r8), intent(inout) :: A(LBi:UBi,LBj:UBj,LBk:UBk)

      real(r8), intent(inout), optional :: B(LBi:UBi,LBj:UBj,LBk:UBk)
      real(r8), intent(inout), optional :: C(LBi:UBi,LBj:UBj,LBk:UBk)
      real(r8), intent(inout), optional :: D(LBi:UBi,LBj:UBj,LBk:UBk)
!
!  Local variable declarations.
!
      logical :: Wexchange, Sexchange, Eexchange, Nexchange
      logical :: processB, processC, processD

      integer :: i, ikS, ikN, ioff, Imin, Imax, Ilen, IKlen
      integer :: j, jkW, jkE, joff, Jmin, Jmax, Jlen, JKlen
      integer :: k, kc,  m, mc, Klen, Lstr, MyRankI, MyRankJ, rank
      integer :: Wtile, GsendW, GrecvW, Wtag, Werror, Wrequest
      integer :: Stile, GsendS, GrecvS, Stag, Serror, Srequest
      integer :: Etile, GsendE, GrecvE, Etag, Eerror, Erequest
      integer :: Ntile, GsendN, GrecvN, Ntag, Nerror, Nrequest
      integer :: EWsize, sizeW, sizeE
      integer :: NSsize, sizeS, sizeN
      integer :: BufferSize
      integer :: Null_Value

      integer, dimension(-1:NtileI(ng),-1:NtileJ(ng)) :: table
# ifdef MPI
      integer, dimension(MPI_STATUS_SIZE,4) :: status
# endif

# if defined EW_PERIODIC || defined NS_PERIODIC
      integer, parameter :: pp = 1
# else
      integer, parameter :: pp = 0
# endif

      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendW
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendE
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendS
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: AsendN

      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvW
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvE
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvS
      real(r8), dimension(Nvar*HaloSize(ng)*(UBk-LBk+1)) :: ArecvN

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 39)
# endif
!
!-----------------------------------------------------------------------
!  Send and recive tile boundary data.
!-----------------------------------------------------------------------
!
!  Set tile partition table for looking up adjacent processes. Notice
!  that a null value is used in places that data transmition is not
!  required.
!
# if defined MPI
      Null_Value=MPI_PROC_NULL
# else
      Null_Value=-1
# endif
      DO j=-1,NtileJ(ng)
        DO i=-1,NtileI(ng)
          table(i,j)=Null_Value
        END DO
      END DO
      rank=0
      DO j=0,NtileJ(ng)-1
        DO i=0,NtileI(ng)-1
          table(i,j)=rank
          IF (MyRank.eq.rank) THEN
            MyRankI=i
            MyRankJ=j
          END IF
          rank=rank+1            
        END DO
      END DO
!
!  Set boundary communication tags and exchange switches.
!
      Wtag=1
      Stag=2
      Etag=3
      Ntag=4
      Wexchange=.FALSE.
      Sexchange=.FALSE.
      Eexchange=.FALSE.
      Nexchange=.FALSE.
!
!  Set switches to process additional variables for aggregations.
!
      processB=.FALSE.
      processC=.FALSE.
      processD=.FALSE.
      IF (Nvar.gt.1) processB=.TRUE.
      IF (Nvar.gt.2) processC=.TRUE.
      IF (Nvar.gt.3) processD=.TRUE.
!
!-----------------------------------------------------------------------
!  Process Western and Eastern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Jmin=LBj
      Jmax=UBj
      Jlen=Jmax-Jmin+1
      Klen=UBk-LBk+1
      JKlen=Jlen*Klen
      EWsize=Nvar*(Nghost+pp)*JKlen
      BufferSize=Nvar*HaloSize(ng)*Klen
!
!  Determine the rank of Western and Eastern tiles.  Then, determine
!  the number of ghost-points to send and receive in the West- and
!  East-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2   Lm+3
!                      -2    -1     0    1     2      3
!
      IF (EWperiodic) THEN
        IF ((table(MyRankI-1,MyRankJ).eq.Null_Value).and.               &
     &      (NtileI(ng).gt.1)) THEN
          Wtile=table(NtileI(ng)-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
# if defined THREE_GHOST
          GrecvW=Nghost
# else
          GrecvW=Nghost+1
# endif
          GrecvE=Nghost
        ELSE IF ((table(MyRankI+1,MyRankJ).eq.Null_Value).and.          &
     &           (NtileI(ng).gt.1)) THEN
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(0,MyRankJ)
          GsendW=Nghost
# if defined THREE_GHOST
          GsendE=Nghost
# else
          GsendE=Nghost+1
# endif
          GrecvW=Nghost
          GrecvE=Nghost
        ELSE
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
          GrecvW=Nghost
          GrecvE=Nghost
        END IF
      ELSE
        Wtile=table(MyRankI-1,MyRankJ)
        Etile=table(MyRankI+1,MyRankJ)
        GsendW=Nghost
        GsendE=Nghost
        GrecvW=Nghost
        GrecvE=Nghost
      END IF
      IF (Wtile.ne.Null_Value) Wexchange=.TRUE.
      IF (Etile.ne.Null_Value) Eexchange=.TRUE.
!
!-----------------------------------------------------------------------
!  Adjoint of unpack Eastern and Western segments.
!-----------------------------------------------------------------------
!
      IF (Eexchange) THEN
        DO i=1,BufferSize
          ArecvE(i)=0.0_r8
          AsendE(i)=0.0_r8
        END DO
        sizeE=0
        DO m=1,GrecvE
          mc=(m-1)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              sizeE=sizeE+1
              jkE=1+(j-Jmin)+kc+mc
!>            A(Iend+m,j,k)=ArecvE(jkE)
!>
              ArecvE(jkE)=A(Iend+m,j,k)
              A(Iend+m,j,k)=0.0_r8
            END DO
          ENDDO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=joff+1+(j-Jmin)+kc+mc
!>              B(Iend+m,j,k)=ArecvE(jkE)
!>
                ArecvE(jkE)=B(Iend+m,j,k)
                B(Iend+m,j,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=joff+1+(j-Jmin)+kc+mc
!>              C(Iend+m,j,k)=ArecvE(jkE)
!>
                ArecvE(jkE)=C(Iend+m,j,k)
                C(Iend+m,j,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=joff+1+(j-Jmin)+kc+mc
!>              D(Iend+m,j,k)=ArecvE(jkE)
!>
                ArecvE(jkE)=D(Iend+m,j,k)
                D(Iend+m,j,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Wexchange) THEN
        DO i=1,BufferSize
          ArecvW(i)=0.0_r8
          AsendW(i)=0.0_r8
        END DO
        sizeW=0
        DO m=GrecvW,1,-1
          mc=(GrecvW-m)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              sizeW=sizeW+1
              jkW=1+(j-Jmin)+kc+mc
!>            A(Istr-m,j,k)=ArecvW(jkW)
!>
              ArecvW(jkW)=A(Istr-m,j,k)
              A(Istr-m,j,k)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=joff+1+(j-Jmin)+kc+mc
!>              B(Istr-m,j,k)=ArecvW(jkW)
!>
                ArecvW(jkW)=B(Istr-m,j,k)
                B(Istr-m,j,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=joff+1+(j-Jmin)+kc+mc
!>              C(Istr-m,j,k)=ArecvW(jkW)
!>
                ArecvW(jkW)=C(Istr-m,j,k)
                C(Istr-m,j,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=joff+1+(j-Jmin)+kc+mc
!>              D(Istr-m,j,k)=ArecvW(jkW)
!>
                ArecvW(jkW)=D(Istr-m,j,k)
                D(Istr-m,j,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Western and Eastern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Wexchange) THEN
!>      CALL mpi_irecv (ArecvW, EWsize, MP_FLOAT, Wtile, Etag,          &
!>   &                  OCN_COMM_WORLD, Wrequest, Werror)
!>
        CALL mpi_irecv (AsendW, EWsize, MP_FLOAT, Wtile, Etag,          &
     &                  OCN_COMM_WORLD, Wrequest, Werror)
      END IF
      IF (Eexchange) THEN
!>      CALL mpi_irecv (ArecvE, EWsize, MP_FLOAT, Etile, Wtag,          &
!>   &                  OCN_COMM_WORLD, Erequest, Eerror)
!>
        CALL mpi_irecv (AsendE, EWsize, MP_FLOAT, Etile, Wtag,          &
     &                  OCN_COMM_WORLD, Erequest, Eerror)
      END IF
      IF (Wexchange) THEN
!>      CALL mpi_send  (AsendW, sizeW, MP_FLOAT, Wtile, Wtag,           &
!>   &                  OCN_COMM_WORLD, Werror)
!>
        CALL mpi_send  (ArecvW, sizeW, MP_FLOAT, Wtile, Wtag,           &
     &                  OCN_COMM_WORLD, Werror)
      END IF
      IF (Eexchange) THEN
!>      CALL mpi_send  (AsendE, sizeE, MP_FLOAT, Etile, Etag,           &
!>   &                  OCN_COMM_WORLD, Eerror)
!>
        CALL mpi_send  (ArecvE, sizeE, MP_FLOAT, Etile, Etag,           &
     &                  OCN_COMM_WORLD, Eerror)
      END IF
# endif
!
!  Adjoint of packing tile boundary data including ghost-points.
!
      IF (Wexchange) THEN
# ifdef MPI
        CALL mpi_wait (Wrequest, status(1,1), Werror)
        IF (Werror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Werror, string, LEN(string), Werror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Western Edge)',        &
     &                      MyRank, Werror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendW
          mc=(m-1)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              jkW=1+(j-Jmin)+kc+mc
!>            AsendW(jkW)=A(Istr+m-1,j,k)
!>
              A(Istr+m-1,j,k)=A(Istr+m-1,j,k)+AsendW(jkW)
              AsendW(jkW)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=joff+1+(j-Jmin)+kc+mc
!>              AsendW(jkW)=B(Istr+m-1,j,k)
!>
                B(Istr+m-1,j,k)=B(Istr+m-1,j,k)+AsendW(jkW)
                AsendW(jkW)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=joff+1+(j-Jmin)+kc+mc
!>              AsendW(jkW)=C(Istr+m-1,j,k)
!>
                C(Istr+m-1,j,k)=C(Istr+m-1,j,k)+AsendW(jkW)
                AsendW(jkW)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=joff+1+(j-Jmin)+kc+mc
!>              AsendW(jkW)=D(Istr+m-1,j,k)
!>
                D(Istr+m-1,j,k)=D(Istr+m-1,j,k)+AsendW(jkW)
                AsendW(jkW)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
# ifdef MPI
        CALL mpi_wait (Erequest, status(1,3), Eerror)
        IF (Eerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Eerror, string, LEN(string), Eerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Eastern Edge)',        &
     &                      MyRank, Eerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendE
          mc=(m-1)*JKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Jlen
            DO j=Jmin,Jmax
              jkE=1+(j-Jmin)+kc+mc
!>            AsendE(jkE)=A(Iend-GsendE+m,j,k)
!>
              A(Iend-GsendE+m,j,k)=A(Iend-GsendE+m,j,k)+AsendE(jkE)
              AsendE(jkE)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=joff+1+(j-Jmin)+kc+mc
!>              AsendE(jkE)=B(Iend-GsendE+m,j,k)
!>
                B(Iend-GsendE+m,j,k)=B(Iend-GsendE+m,j,k)+AsendE(jkE)
                AsendE(jkE)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=joff+1+(j-Jmin)+kc+mc
!>              AsendE(jkE)=C(Iend-GsendE+m,j,k)
!>
                C(Iend-GsendE+m,j,k)=C(Iend-GsendE+m,j,k)+AsendE(jkE)
                AsendE(jkE)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=joff+1+(j-Jmin)+kc+mc
!>              AsendE(jkE)=D(Iend-GsendE+m,j,k)
!>
                D(Iend-GsendE+m,j,k)=D(Iend-GsendE+m,j,k)+AsendE(jkE)
                AsendE(jkE)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Process Southern and Northern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Imin=LBi
      Imax=UBi
      Ilen=Imax-Imin+1
      Klen=UBk-LBk+1
      IKlen=Ilen*Klen
      NSsize=Nvar*(Nghost+pp)*IKlen
!
!  Determine the rank of Southern and Northern tiles.  Then, determine
!  the number of ghost-points to send and receive in the South- and
!  North-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2  Mm+3
!                      -2    -1     0    1     2     3
!
      IF (NSperiodic) THEN
        IF ((table(MyRankI,MyRankJ-1).eq.Null_Value).and.               &
     &      (NtileJ(ng).gt.1)) THEN
          Stile=table(MyRankI,NtileJ(ng)-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
# if defined THREE_GHOST
          GrecvS=Nghost
# else
          GrecvS=Nghost+1
# endif
          GrecvN=Nghost
        ELSE IF ((table(MyRankI,MyRankJ+1).eq.Null_Value).and.          &
     &           (NtileJ(ng).gt.1)) then
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,0)
          GsendS=Nghost
# if defined THREE_GHOST
          GsendN=Nghost
# else
          GsendN=Nghost+1
# endif
          GrecvS=Nghost
          GrecvN=Nghost
        ELSE
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
          GrecvS=Nghost
          GrecvN=Nghost
        END IF
      ELSE
        Stile=table(MyRankI,MyRankJ-1)
        Ntile=table(MyRankI,MyRankJ+1)
        GsendS=Nghost
        GsendN=Nghost
        GrecvS=Nghost
        GrecvN=Nghost
      END IF
      IF (Stile.ne.Null_Value) Sexchange=.TRUE.
      IF (Ntile.ne.Null_Value) Nexchange=.TRUE.
!
!-----------------------------------------------------------------------
!  Adjoint of unpacking Northern and Southern segments.
!-----------------------------------------------------------------------
!
      IF (Nexchange) THEN
        DO i=1,BufferSize
          ArecvN(i)=0.0_r8
          AsendN(i)=0.0_r8
        END DO
        sizeN=0
        DO m=1,GrecvN
          mc=(m-1)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              sizeN=sizeN+1
              ikN=1+(i-Imin)+kc+mc
!>            A(i,Jend+m,k)=ArecvN(ikN)
!>
              ArecvN(ikN)=A(i,Jend+m,k)
              A(i,Jend+m,k)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=ioff+1+(i-Imin)+kc+mc
!>              B(i,Jend+m,k)=ArecvN(ikN)
!>
                ArecvN(ikN)=B(i,Jend+m,k)
                B(i,Jend+m,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=ioff+1+(i-Imin)+kc+mc
!>              C(i,Jend+m,k)=ArecvN(ikN)
!>
                ArecvN(ikN)=C(i,Jend+m,k)
                C(i,Jend+m,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=ioff+1+(i-Imin)+kc+mc
!>              D(i,Jend+m,k)=ArecvN(ikN)
!>
                ArecvN(ikN)=D(i,Jend+m,k)
                D(i,Jend+m,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Sexchange) THEN
        DO i=1,BufferSize
          ArecvS(i)=0.0_r8
          AsendS(i)=0.0_r8
        END DO
        sizeS=0
        DO m=GrecvS,1,-1
          mc=(GrecvS-m)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              sizeS=sizeS+1
              ikS=1+(i-Imin)+kc+mc
!>            A(i,Jstr-m,k)=ArecvS(ikS)
!>
              ArecvS(ikS)=A(i,Jstr-m,k)
              A(i,Jstr-m,k)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=ioff+1+(i-Imin)+kc+mc
!>              B(i,Jstr-m,k)=ArecvS(ikS)
!>
                ArecvS(ikS)=B(i,Jstr-m,k)
                B(i,Jstr-m,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=ioff+1+(i-Imin)+kc+mc
!>              C(i,Jstr-m,k)=ArecvS(ikS)
!>
                ArecvS(ikS)=C(i,Jstr-m,k)
                C(i,Jstr-m,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=ioff+1+(i-Imin)+kc+mc
!>              D(i,Jstr-m,k)=ArecvS(ikS)
!>
                ArecvS(ikS)=D(i,Jstr-m,k)
                D(i,Jstr-m,k)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Adjoint of send and receive Southern and Northern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Sexchange) THEN
!>      CALL mpi_irecv (ArecvS, NSsize, MP_FLOAT, Stile, Ntag,          &
!>   &                  OCN_COMM_WORLD, Srequest, Serror)
!>
        CALL mpi_irecv (AsendS, NSsize, MP_FLOAT, Stile, Ntag,          &
     &                  OCN_COMM_WORLD, Srequest, Serror)
      END IF
      IF (Nexchange) THEN
!>      CALL mpi_irecv (ArecvN, NSsize, MP_FLOAT, Ntile, Stag,          &
!>   &                  OCN_COMM_WORLD, Nrequest, Nerror)
!>
        CALL mpi_irecv (AsendN, NSsize, MP_FLOAT, Ntile, Stag,          &
     &                  OCN_COMM_WORLD, Nrequest, Nerror)
      END IF
      IF (Sexchange) THEN
!>      CALL mpi_send  (AsendS, sizeS, MP_FLOAT, Stile, Stag,           &
!>   &                  OCN_COMM_WORLD, Serror)
!>
        CALL mpi_send  (ArecvS, sizeS, MP_FLOAT, Stile, Stag,           &
     &                  OCN_COMM_WORLD, Serror)
      END IF
      IF (Nexchange) THEN
!>      CALL mpi_send  (AsendN, sizeN, MP_FLOAT, Ntile, Ntag,           &
!>   &                  OCN_COMM_WORLD, Nerror)
!>
        CALL mpi_send  (ArecvN, sizeN, MP_FLOAT, Ntile, Ntag,           &
     &                  OCN_COMM_WORLD, Nerror)
      END IF
# endif
!
!  Adjoint of packing tile boundary data including ghost-points.
!
      IF (Sexchange) THEN            
# ifdef MPI
        CALL mpi_wait (Srequest, status(1,2), Serror)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Serror, string, LEN(string), Serror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Southern Edge)',       &
     &                      MyRank, Serror, string(1:Lstr)
 10       FORMAT (/,' AD_MP_EXCHANGE3D - error during ',a,' call,',     &
     &            ' Node = ', i3.3,' Error = ',i3,/,18x,a)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendS
          mc=(m-1)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              ikS=1+(i-Imin)+kc+mc
!>            AsendS(ikS)=A(i,Jstr+m-1,k)
!>
              A(i,Jstr+m-1,k)=A(i,Jstr+m-1,k)+AsendS(ikS)
              AsendS(ikS)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=ioff+1+(i-Imin)+kc+mc
!>              AsendS(ikS)=B(i,Jstr+m-1,k)
!>
                B(i,Jstr+m-1,k)=B(i,Jstr+m-1,k)+AsendS(ikS)
                AsendS(ikS)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=ioff+1+(i-Imin)+kc+mc
!>              AsendS(ikS)=C(i,Jstr+m-1,k)
!>
                C(i,Jstr+m-1,k)=C(i,Jstr+m-1,k)+AsendS(ikS)
                AsendS(ikS)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=ioff+1+(i-Imin)+kc+mc
!>              AsendS(ikS)=D(i,Jstr+m-1,k)
!>
                D(i,Jstr+m-1,k)=D(i,Jstr+m-1,k)+AsendS(ikS)
                AsendS(ikS)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
# ifdef MPI
        CALL mpi_wait (Nrequest, status(1,4), Nerror)
        IF (Nerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Nerror, string, LEN(string), Nerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Northern Edge)',       &
     &                      MyRank, Nerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendN
          mc=(m-1)*IKlen
          DO k=LBk,UBk
            kc=(k-LBk)*Ilen
            DO i=Imin,Imax
              ikN=1+(i-Imin)+kc+mc
!>            AsendN(ikN)=A(i,Jend-GsendN+m,k)
!>
              A(i,Jend-GsendN+m,k)=A(i,Jend-GsendN+m,k)+AsendN(ikN)
              AsendN(ikN)=0.0_r8
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=ioff+1+(i-Imin)+kc+mc
!>              AsendN(ikN)=B(i,Jend-GsendN+m,k)
!>
                B(i,Jend-GsendN+m,k)=B(i,Jend-GsendN+m,k)+AsendN(ikN)
                AsendN(ikN)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processC) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=ioff+1+(i-Imin)+kc+mc
!>              AsendN(ikN)=C(i,Jend-GsendN+m,k)
!>
                C(i,Jend-GsendN+m,k)=C(i,Jend-GsendN+m,k)+AsendN(ikN)
                AsendN(ikN)=0.0_r8
              END DO
            END DO
          END DO
        END IF
        IF (processD) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=ioff+1+(i-Imin)+kc+mc
!>              AsendN(ikN)=D(i,Jend-GsendN+m,k)
!>
                D(i,Jend-GsendN+m,k)=D(i,Jend-GsendN+m,k)+AsendN(ikN)
                AsendN(ikN)=0.0_r8
              END DO
            END DO
          END DO
        END IF
      END IF
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 39)
# endif

      RETURN
      END SUBROUTINE ad_mp_exchange3d

      SUBROUTINE ad_mp_exchange4d (ng, model, Nvar,                     &
     &                             Istr, Iend, Jstr, Jend,              &
     &                             LBi, UBi, LBj, UBj, LBk, UBk,        &
     &                             LBt, UBt,                            &
     &                             Nghost, EWperiodic, NSperiodic,      &
     &                             A, B)
!
!=================================================== Andrew M. Moore ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  Adjoint Exchange Communications:                                    !
!                                                                      !
!  This routine updates the tile overlap halo of  NV 4D variables.     !
!  It exchanges the specified number of "ghost-points".  In  order     !
!  to minimize the number send and receive calls, the ghost-points     !
!  are included in the buffers.  Therefore, the order of the pack,     !
!  send, receive, and unpack is crucial.                               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nvar       Number of variables for aggregated exchanges.         !
!     Istr       Starting tile index in the I-direction.               !
!     Iend       Ending   tile index in the I-direction.               !
!     Jstr       Starting tile index in the J-direction.               !
!     Jend       Ending   tile index in the J-direction.               !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound.                              !
!     UBk        K-dimension Upper bound.                              !
!     LBt        T-dimension Lower bound.                              !
!     UBt        T-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     EWperiodic Switch indicating EW periodicity exchanges.           !
!     NSperiodic Switch indicating NS periodicity exchanges.           !
!     A          4D tiled adjoint array to process.                    !
!     B          4D tiled adjoint array (optional) to process.         !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Updated 4D tiled adjoint array.                       !
!     B          Updated 4D tiled adjoint array (optional).            !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      logical, intent(in) :: EWperiodic, NSperiodic

      integer, intent(in) :: ng, model, Nvar
      integer, intent(in) :: Iend, Istr, Jend, Jstr
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk, LBt, UBt
      integer, intent(in) :: Nghost

      real(r8), intent(inout) :: A(LBi:UBi,LBj:UBj,LBk:UBk,LBt:UBt)

      real(r8), intent(inout), optional ::                              &
     &                           B(LBi:UBi,LBj:UBj,LBk:UBk,LBt:UBt)
!
!  Local variable declarations.
!
      logical :: Wexchange, Sexchange, Eexchange, Nexchange
      logical :: processB

      integer :: i, ikS, ikN, ioff, Imin, Imax, Ilen, IKlen, IKTlen
      integer :: j, jkW, jkE, joff, Jmin, Jmax, Jlen, JKlen, JKTlen
      integer :: k, kc, m, mc, Klen, Lstr, MyRankI, MyRankJ, rank
      integer :: l, lc, Tlen
      integer :: Wtile, GsendW, GrecvW, Wtag, Werror, Wrequest
      integer :: Stile, GsendS, GrecvS, Stag, Serror, Srequest
      integer :: Etile, GsendE, GrecvE, Etag, Eerror, Erequest
      integer :: Ntile, GsendN, GrecvN, Ntag, Nerror, Nrequest
      integer :: EWsize, sizeW, sizeE
      integer :: NSsize, sizeS, sizeN
      integer :: BufferSize
      integer :: Null_Value

      integer, dimension(-1:NtileI(ng),-1:NtileJ(ng)) :: table
# ifdef MPI
      integer, dimension(MPI_STATUS_SIZE,4) :: status
# endif

# if defined EW_PERIODIC || defined NS_PERIODIC
      integer, parameter :: pp = 1
# else
      integer, parameter :: pp = 0
# endif

      real(r8) :: AsendW(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: AsendE(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: AsendS(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: AsendN(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))

      real(r8) :: ArecvW(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: ArecvE(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: ArecvS(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))
      real(r8) :: ArecvN(Nvar*HaloSize(ng)*(UBk-LBk+1)*(UBt-LBt+1))

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 39)
# endif
!
!-----------------------------------------------------------------------
!  Send and recive tile boundary data.
!-----------------------------------------------------------------------
!
!  Set tile partition table for looking up adjacent processes. Notice
!  that a null value is used in places that data transmition is not
!  required.
!
# if defined MPI
      Null_Value=MPI_PROC_NULL
# else
      Null_Value=-1
# endif
      DO j=-1,NtileJ(ng)
        DO i=-1,NtileI(ng)
          table(i,j)=Null_Value
        END DO
      END DO
      rank=0
      DO j=0,NtileJ(ng)-1
        DO i=0,NtileI(ng)-1
          table(i,j)=rank
          IF (MyRank.eq.rank) THEN
            MyRankI=i
            MyRankJ=j
          END IF
          rank=rank+1            
        END DO
      END DO
!
!  Set boundary communication tags and exchange switches.
!
      Wtag=1
      Stag=2
      Etag=3
      Ntag=4
      Wexchange=.FALSE.
      Sexchange=.FALSE.
      Eexchange=.FALSE.
      Nexchange=.FALSE.
!
!  Set switches to process additional variables for aggregations.
!
      processB=.FALSE.
      IF (Nvar.gt.1) processB=.TRUE.
!
!-----------------------------------------------------------------------
!  Process Western and Eastern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Jmin=LBj
      Jmax=UBj
      Jlen=Jmax-Jmin+1
      Klen=UBk-LBk+1
      Tlen=UBt-LBt+1
      JKlen=Jlen*Klen
      JKTlen=JKlen*Tlen
      EWsize=Nvar*(Nghost+pp)*JKTlen
      BufferSize=Nvar*HaloSize(ng)*Klen*Tlen
!
!  Determine the rank of Western and Eastern tiles.  Then, determine
!  the number of ghost-points to send and receive in the West- and
!  East-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Lm-2  Lm-1  Lm   Lm+1  Lm+2   Lm+3
!                      -2    -1     0    1     2      3
!
      IF (EWperiodic) THEN
        IF ((table(MyRankI-1,MyRankJ).eq.Null_Value).and.               &
     &      (NtileI(ng).gt.1)) THEN
          Wtile=table(NtileI(ng)-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
# if defined THREE_GHOST
          GrecvW=Nghost
# else
          GrecvW=Nghost+1
# endif
          GrecvE=Nghost
        ELSE IF ((table(MyRankI+1,MyRankJ).eq.Null_Value).and.          &
     &           (NtileI(ng).gt.1)) THEN
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(0,MyRankJ)
          GsendW=Nghost
# if defined THREE_GHOST
          GsendE=Nghost
# else
          GsendE=Nghost+1
# endif
          GrecvW=Nghost
          GrecvE=Nghost
        ELSE
          Wtile=table(MyRankI-1,MyRankJ)
          Etile=table(MyRankI+1,MyRankJ)
          GsendW=Nghost
          GsendE=Nghost
          GrecvW=Nghost
          GrecvE=Nghost
        END IF
      ELSE
        Wtile=table(MyRankI-1,MyRankJ)
        Etile=table(MyRankI+1,MyRankJ)
        GsendW=Nghost
        GsendE=Nghost
        GrecvW=Nghost
        GrecvE=Nghost
      END IF
      IF (Wtile.ne.Null_Value) Wexchange=.TRUE.
      IF (Etile.ne.Null_Value) Eexchange=.TRUE.
!
!-----------------------------------------------------------------------
!  Adjoint of unpack Eastern and Western segments.
!-----------------------------------------------------------------------
!
      IF (Eexchange) THEN
        DO i=1,BufferSize
          ArecvE(i)=0.0_r8
          AsendE(i)=0.0_r8
        END DO
        sizeE=0
        DO m=1,GrecvE
          mc=(m-1)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeE=sizeE+1
                jkE=1+(j-Jmin)+kc+lc+mc
!>              A(Iend+m,j,k,l)=ArecvE(jkE)
!>
                ArecvE(jkE)=A(Iend+m,j,k,l)
                A(Iend+m,j,k,l)=0.0_r8
              END DO
            END DO
          ENDDO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GrecvE
            mc=(m-1)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  sizeE=sizeE+1
                  jkE=joff+1+(j-Jmin)+kc+lc+mc
!>                B(Iend+m,j,k,l)=ArecvE(jkE)
!>
                  ArecvE(jkE)=B(Iend+m,j,k,l)
                  B(Iend+m,j,k,l)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Wexchange) THEN
        DO i=1,BufferSize
          ArecvW(i)=0.0_r8
          AsendW(i)=0.0_r8
        END DO
        sizeW=0
        DO m=GrecvW,1,-1
          mc=(GrecvW-m)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                sizeW=sizeW+1
                jkW=1+(j-Jmin)+kc+lc+mc
!>              A(Istr-m,j,k,l)=ArecvW(jkW)
!>
                ArecvW(jkW)=A(Istr-m,j,k,l)
                A(Istr-m,j,k,l)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=GrecvW,1,-1
            mc=(GrecvW-m)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  sizeW=sizeW+1
                  jkW=joff+1+(j-Jmin)+kc+lc+mc
!>                B(Istr-m,j,k,l)=ArecvW(jkW)
!>
                  ArecvW(jkW)=B(Istr-m,j,k,l)
                  B(Istr-m,j,k,l)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Send and receive Western and Eastern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Wexchange) THEN
!>      CALL mpi_irecv (ArecvW, EWsize, MP_FLOAT, Wtile, Etag,          &
!>   &                  OCN_COMM_WORLD, Wrequest, Werror)
!>
        CALL mpi_irecv (AsendW, EWsize, MP_FLOAT, Wtile, Etag,          &
     &                  OCN_COMM_WORLD, Wrequest, Werror)
      END IF
      IF (Eexchange) THEN
!>      CALL mpi_irecv (ArecvE, EWsize, MP_FLOAT, Etile, Wtag,          &
!>   &                  OCN_COMM_WORLD, Erequest, Eerror)
!>
        CALL mpi_irecv (AsendE, EWsize, MP_FLOAT, Etile, Wtag,          &
     &                  OCN_COMM_WORLD, Erequest, Eerror)
      END IF
      IF (Wexchange) THEN
!>      CALL mpi_send  (AsendW, sizeW, MP_FLOAT, Wtile, Wtag,           &
!>   &                  OCN_COMM_WORLD, Werror)
!>
        CALL mpi_send  (ArecvW, sizeW, MP_FLOAT, Wtile, Wtag,           &
     &                  OCN_COMM_WORLD, Werror)
      END IF
      IF (Eexchange) THEN
!>      CALL mpi_send  (AsendE, sizeE, MP_FLOAT, Etile, Etag,           &
!>   &                  OCN_COMM_WORLD, Eerror)
!>
        CALL mpi_send  (ArecvE, sizeE, MP_FLOAT, Etile, Etag,           &
     &                  OCN_COMM_WORLD, Eerror)
      END IF
# endif
!
!  Adjoint of packing tile boundary data including ghost-points.
!
      IF (Wexchange) THEN
# ifdef MPI
        CALL mpi_wait (Wrequest, status(1,1), Werror)
        IF (Werror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Werror, string, LEN(string), Werror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Western Edge)',        &
     &                      MyRank, Werror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendW
          mc=(m-1)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkW=1+(j-Jmin)+kc+lc+mc
!>              AsendW(jkW)=A(Istr+m-1,j,k,l)
!>
                A(Istr+m-1,j,k,l)=A(Istr+m-1,j,k,l)+                    &
     &                            AsendW(jkW)
                AsendW(jkW)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkW
          DO m=1,GsendW
            mc=(m-1)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  jkW=joff+1+(j-Jmin)+kc+lc+mc
!>                AsendW(jkW)=B(Istr+m-1,j,k,l)
!>
                  B(Istr+m-1,j,k,l)=B(Istr+m-1,j,k,l)+                  &
     &                              AsendW(jkW)
                  AsendW(jkW)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Eexchange) THEN
# ifdef MPI
        CALL mpi_wait (Erequest, status(1,3), Eerror)
        IF (Eerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Eerror, string, LEN(string), Eerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Eastern Edge)',        &
     &                      MyRank, Eerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendE
          mc=(m-1)*JKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*JKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Jlen
              DO j=Jmin,Jmax
                jkE=1+(j-Jmin)+kc+lc+mc
!>              AsendE(jkE)=A(Iend-GsendE+m,j,k,l)
!>
                A(Iend-GsendE+m,j,k,l)=A(Iend-GsendE+m,j,k,l)+          &
     &                                 AsendE(jkE)
                AsendE(jkE)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          joff=jkE
          DO m=1,GsendE
            mc=(m-1)*JKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*JKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Jlen
                DO j=Jmin,Jmax
                  jkE=joff+1+(j-Jmin)+kc+lc+mc
!>                AsendE(jkE)=B(Iend-GsendE+m,j,k,l)
!>
                  B(Iend-GsendE+m,j,k,l)=B(Iend-GsendE+m,j,k,l)+        &
     &                                   AsendE(jkE)
                  AsendE(jkE)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Process Southern and Northern tile boundary data.
!-----------------------------------------------------------------------
!
!  Determine range and length of the distributed tile boundary segments.
!
      Imin=LBi
      Imax=UBi
      Ilen=Imax-Imin+1
      Klen=UBk-LBk+1
      Tlen=UBt-LBt+1
      IKlen=Ilen*Klen
      IKTlen=IKlen*Tlen
      NSsize=Nvar*(Nghost+pp)*IKTlen
!
!  Determine the rank of Southern and Northern tiles.  Then, determine
!  the number of ghost-points to send and receive in the South- and
!  North-directions.  This logic only works for two and three ghost
!  points. The number of ghost-points changes when periodic boundary
!  condition are activated.  The periodicity is as follows:
!
!  If two ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2
!                      -2    -1     0    1     2
!
!  If three ghost-points:
!
!                      Mm-2  Mm-1  Mm   Mm+1  Mm+2  Mm+3
!                      -2    -1     0    1     2     3
!
      IF (NSperiodic) THEN
        IF ((table(MyRankI,MyRankJ-1).eq.Null_Value).and.               &
     &      (NtileJ(ng).gt.1)) THEN
          Stile=table(MyRankI,NtileJ(ng)-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
# if defined THREE_GHOST
          GrecvS=Nghost
# else
          GrecvS=Nghost+1
# endif
          GrecvN=Nghost
        ELSE IF ((table(MyRankI,MyRankJ+1).eq.Null_Value).and.          &
     &           (NtileJ(ng).gt.1)) then
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,0)
          GsendS=Nghost
# if defined THREE_GHOST
          GsendN=Nghost
# else
          GsendN=Nghost+1
# endif
          GrecvS=Nghost
          GrecvN=Nghost
        ELSE
          Stile=table(MyRankI,MyRankJ-1)
          Ntile=table(MyRankI,MyRankJ+1)
          GsendS=Nghost
          GsendN=Nghost
          GrecvS=Nghost
          GrecvN=Nghost
        END IF
      ELSE
        Stile=table(MyRankI,MyRankJ-1)
        Ntile=table(MyRankI,MyRankJ+1)
        GsendS=Nghost
        GsendN=Nghost
        GrecvS=Nghost
        GrecvN=Nghost
      END IF
      IF (Stile.ne.Null_Value) Sexchange=.TRUE.
      IF (Ntile.ne.Null_Value) Nexchange=.TRUE.
!
!-----------------------------------------------------------------------
!  Adjoint of unpacking Northern and Southern segments.
!-----------------------------------------------------------------------
!
      IF (Nexchange) THEN
        DO i=1,BufferSize
          ArecvN(i)=0.0_r8
          AsendN(i)=0.0_r8
        END DO
        sizeN=0
        DO m=1,GrecvN
          mc=(m-1)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeN=sizeN+1
                ikN=1+(i-Imin)+kc+lc+mc
!>              A(i,Jend+m,k,l)=ArecvN(ikN)
!>
                ArecvN(ikN)=A(i,Jend+m,k,l)
                A(i,Jend+m,k,l)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GrecvN
            mc=(m-1)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  sizeN=sizeN+1
                  ikN=ioff+1+(i-Imin)+kc+lc+mc
!>                B(i,Jend+m,k,l)=ArecvN(ikN)
!>
                  ArecvN(ikN)=B(i,Jend+m,k,l)
                  B(i,Jend+m,k,l)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Sexchange) THEN
        DO i=1,BufferSize
          ArecvS(i)=0.0_r8
          AsendS(i)=0.0_r8
        END DO
        sizeS=0
        DO m=GrecvS,1,-1
          mc=(GrecvS-m)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                sizeS=sizeS+1
                ikS=1+(i-Imin)+kc+lc+mc
!>              A(i,Jstr-m,k,l)=ArecvS(ikS)
!>
                ArecvS(ikS)=A(i,Jstr-m,k,l)
                A(i,Jstr-m,k,l)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=GrecvS,1,-1
            mc=(GrecvS-m)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  sizeS=sizeS+1
                  ikS=ioff+1+(i-Imin)+kc+lc+mc
!>                B(i,Jstr-m,k,l)=ArecvS(ikS)
!>
                  ArecvS(ikS)=B(i,Jstr-m,k,l)
                  B(i,Jstr-m,k,l)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
!-----------------------------------------------------------------------
!  Adjoint of send and receive Southern and Northern segments.
!-----------------------------------------------------------------------
!
# if defined MPI
      IF (Sexchange) THEN
!>      CALL mpi_irecv (ArecvS, NSsize, MP_FLOAT, Stile, Ntag,          &
!>   &                  OCN_COMM_WORLD, Srequest, Serror)
!>
        CALL mpi_irecv (AsendS, NSsize, MP_FLOAT, Stile, Ntag,          &
     &                  OCN_COMM_WORLD, Srequest, Serror)
      END IF
      IF (Nexchange) THEN
!>      CALL mpi_irecv (ArecvN, NSsize, MP_FLOAT, Ntile, Stag,          &
!>   &                  OCN_COMM_WORLD, Nrequest, Nerror)
!>
        CALL mpi_irecv (AsendN, NSsize, MP_FLOAT, Ntile, Stag,          &
     &                  OCN_COMM_WORLD, Nrequest, Nerror)
      END IF
      IF (Sexchange) THEN
!>      CALL mpi_send  (AsendS, sizeS, MP_FLOAT, Stile, Stag,           &
!>   &                  OCN_COMM_WORLD, Serror)
!>
        CALL mpi_send  (ArecvS, sizeS, MP_FLOAT, Stile, Stag,           &
     &                  OCN_COMM_WORLD, Serror)
      END IF
      IF (Nexchange) THEN
!>      CALL mpi_send  (AsendN, sizeN, MP_FLOAT, Ntile, Ntag,           &
!>   &                  OCN_COMM_WORLD, Nerror)
!>
        CALL mpi_send  (ArecvN, sizeN, MP_FLOAT, Ntile, Ntag,           &
     &                  OCN_COMM_WORLD, Nerror)
      END IF
# endif
!
!  Adjoint of packing tile boundary data including ghost-points.
!
      IF (Sexchange) THEN            
# ifdef MPI
        CALL mpi_wait (Srequest, status(1,2), Serror)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Serror, string, LEN(string), Serror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Southern Edge)',       &
     &                      MyRank, Serror, string(1:Lstr)
 10       FORMAT (/,' AD_MP_EXCHANGE4D - error during ',a,' call,',     &
     &            ' Node = ', i3.3,' Error = ',i3,/,18x,a)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendS
          mc=(m-1)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikS=1+(i-Imin)+kc+lc+mc
!>              AsendS(ikS)=A(i,Jstr+m-1,k,l)
!>
                A(i,Jstr+m-1,k,l)=A(i,Jstr+m-1,k,l)+                    &
     &                            AsendS(ikS)
                AsendS(ikS)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikS
          DO m=1,GsendS
            mc=(m-1)*IKTlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  ikS=ioff+1+(i-Imin)+kc+lc+mc
!>                AsendS(ikS)=B(i,Jstr+m-1,k,l)
!>
                  B(i,Jstr+m-1,k,l)=B(i,Jstr+m-1,k,l)+                  &
     &                              AsendS(ikS)
                  AsendS(ikS)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
!
      IF (Nexchange) THEN
# ifdef MPI
        CALL mpi_wait (Nrequest, status(1,4), Nerror)
        IF (Nerror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (Nerror, string, LEN(string), Nerror)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_SEND/MPI_IRECV (Northern Edge)',       &
     &                      MyRank, Nerror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
# endif
        DO m=1,GsendN
          mc=(m-1)*IKTlen
          DO l=LBt,UBt
            lc=(l-LBt)*IKlen
            DO k=LBk,UBk
              kc=(k-LBk)*Ilen
              DO i=Imin,Imax
                ikN=1+(i-Imin)+kc+lc+mc
!>              AsendN(ikN)=A(i,Jend-GsendN+m,k,l)
!>
                A(i,Jend-GsendN+m,k,l)=A(i,Jend-GsendN+m,k,l)+          &
     &                                 AsendN(ikN)
                AsendN(ikN)=0.0_r8
              END DO
            END DO
          END DO
        END DO
        IF (processB) THEN
          ioff=ikN
          DO m=1,GsendN
            mc=(m-1)*IKlen
            DO l=LBt,UBt
              lc=(l-LBt)*IKlen
              DO k=LBk,UBk
                kc=(k-LBk)*Ilen
                DO i=Imin,Imax
                  ikN=ioff+1+(i-Imin)+kc+lc+mc
!>                AsendN(ikN)=B(i,Jend-GsendN+m,k)
!>
                  B(i,Jend-GsendN+m,k,l)=B(i,Jend-GsendN+m,k,l)+        &
     &                                   AsendN(ikN)
                  AsendN(ikN)=0.0_r8
                END DO
              END DO
            END DO
          END DO
        END IF
      END IF
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 39)
# endif

      RETURN
      END SUBROUTINE ad_mp_exchange4d

      SUBROUTINE mp_gather (ng, model, LBi, UBi, LBj, UBj, LBk, UBk,    &
     &                      gtype, Ascl,                                &
# ifdef MASKING
     &                      Amask,                                      &
# endif
     &                      A, Npts, Aout)
!
!=======================================================================
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This routine collects requested real tile array (2D or 3D) from     !
!  each spawned MPI node and stores it into one dimensional global     !
!  array. This routine is used by the  Master Node  to collect and     !
!  pack output data.                                                   !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound, if any. Otherwise, a value   !
!                  of one is expected.                                 !
!     LBk        K-dimension Upper bound, if any. Otherwise, a value   !
!                  of one is expected.                                 !
!     UBk        K-dimension Upper bound.                              !
!     gtype      C-grid type. If negative and Land-Sea is available,   !
!                  only water-points processed.                        !
!     Ascl       Factor to scale field before writing.                 !
!     Amask      Land/Sea mask, if any.                                !
!     A          Tile array (2D or 3D) to process.                     !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     Npts       Number of points processed in Aout.                   !
!     Aout       Collected data from each node packed into 1D array    !
!                  in column-major order. That is, in the same way     !
!                  that Fortran multi-dimensional arrays are stored    !
!                  in memory.                                          !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_ncparam
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, gtype
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(out) :: Npts

      real(r8), intent(in) :: Ascl

# ifdef MASKING
      real(r8), intent(in) :: Amask(LBi:UBi,LBj:UBj)
# endif
      real(r8), intent(in)  :: A(LBi:UBi,LBj:UBj,LBk:UBk)
      real(r8), intent(out) :: Aout((Lm(ng)+2)*(Mm(ng)+2)*(UBk-LBk+1))
!
!  Local variable declarations.
!
      integer :: Itile, Jtile, Nghost
      integer :: Io, Ie, Jo, Je, Ioff, Joff, Koff
      integer :: Ilen, Jlen, Klen, IJlen
      integer :: Lstr, MyError, MyType, Srequest
      integer :: i, ic, j, jc, k, kc, np, rank

      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Imin, Imax
      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Jmin, Jmax
      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: MySize
      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Rrequest

      integer, dimension(MPI_STATUS_SIZE) :: Rstatus
      integer, dimension(MPI_STATUS_SIZE) :: Sstatus

      real(r8), dimension(TileSize(ng)) :: Asend

      real(r8), dimension(TileSize(ng),                                 &
     &                    NtileI(ng)*NtileJ(ng)-1) :: Arecv

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 42)
# endif
!
!-----------------------------------------------------------------------
!  Set horizontal starting and ending indices for parallel domain
!  partitions in the XI- and ETA-directions.
!-----------------------------------------------------------------------
!
!  Set first and last grid point according to staggered C-grid
!  classification.
!
      Io=0
      Ie=Lm(ng)+1
      Jo=0
      Je=Mm(ng)+1
      MyType=ABS(gtype)
      IF ((MyType.eq.p2dvar).or.(MyType.eq.u2dvar).or.                  &
     &    (MyType.eq.p3dvar).or.(MyType.eq.u3dvar)) Io=1
      IF ((MyType.eq.p2dvar).or.(MyType.eq.v2dvar).or.                  &
     &    (MyType.eq.p3dvar).or.(MyType.eq.v3dvar)) Jo=1
      IF (Io.eq.0) THEN
        Ioff=1
      ELSE
        Ioff=0
      END IF
      IF (Jo.eq.0) THEN
        Joff=0
      ELSE
        Joff=1
      END IF
      IF (LBk.eq.0) THEN
        Koff=0
      ELSE
        Koff=1
      END IF
      Ilen=Ie-Io+1
      Jlen=Je-Jo+1
      Klen=UBk-LBk+1
      IJlen=Ilen*Jlen
      Npts=IJlen*Klen
!
!  Set physical, non-overlaping (no ghost-points) ranges according to
!  tile rank. Compute size of distributed buffers.
!
      Nghost=0
      DO rank=0,NtileI(ng)*NtileJ(ng)-1
        CALL get_bounds (ng, rank, gtype, Nghost, Itile, Jtile,         &
     &                   Imin(rank), Imax(rank),                        &
     &                   Jmin(rank), Jmax(rank))
        MySize(rank)=(Imax(rank)-Imin(rank)+1)*                         &
     &               (Jmax(rank)-Jmin(rank)+1)*(UBk-LBk+1)
      END DO
!
!-----------------------------------------------------------------------
!  Collect requested array data. 
!-----------------------------------------------------------------------
!
!  Pack and scale input data.
!
      np=0
      DO k=LBk,UBk
        DO j=Jmin(MyRank),Jmax(MyRank)
          DO i=Imin(MyRank),Imax(MyRank)
            np=np+1
            Asend(np)=A(i,j,k)*Ascl
          END DO
        END DO
      END DO
# ifdef MASKING
!
!  If processing water-points only, flag land-points.
!
      IF (gtype.lt.0) THEN
        np=0
        DO k=LBk,UBk
          DO j=Jmin(MyRank),Jmax(MyRank)
            DO i=Imin(MyRank),Imax(MyRank)
              np=np+1
              IF (Amask(i,j).eq.0.0_r8) THEN
                Asend(np)=spval
              END IF
            END DO
          END DO
        END DO
      END IF
# endif
!
!  If master processor, unpack the send buffer since there is not
!  need to distribute.
!
      IF (MyRank.eq.MyMaster) THEN
        np=0
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO j=Jmin(MyRank),Jmax(MyRank)
            jc=(j-Joff)*Ilen+kc
            DO i=Imin(MyRank),Imax(MyRank)
              np=np+1
              ic=i+Ioff+jc
              Aout(ic)=Asend(np)
            END DO
          END DO
        END DO
      END IF
!
!  Send, receive, and unpack data.
!
      IF (MyRank.eq.MyMaster) THEN
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          CALL mpi_irecv (Arecv(1,rank), MySize(rank), MP_FLOAT, rank,  &
     &                    rank+5, OCN_COMM_WORLD, Rrequest(rank),       &
     &                    MyError)
        END DO
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          CALL mpi_wait (Rrequest(rank), Rstatus, MyError)
          IF (MyError.ne.MPI_SUCCESS) THEN
            CALL mpi_error_string (MyError, string, LEN(string),        &
     &                             MyError)
            Lstr=LEN_TRIM(string)
            WRITE (stdout,10) 'MPI_IRECV', rank, MyError, string(1:Lstr)
 10         FORMAT (/,' MP_GATHER - error during ',a,' call, Node = ',  &
     &              i3.3,' Error = ',i3,/,13x,a)
            exit_flag=2
            RETURN
          END IF
          np=0
          DO k=LBk,UBk
            kc=(k-Koff)*IJlen
            DO j=Jmin(rank),Jmax(rank)
              jc=(j-Joff)*Ilen+kc
              DO i=Imin(rank),Imax(rank)
                np=np+1
                ic=i+Ioff+jc
                Aout(ic)=Arecv(np,rank)
              END DO
            END DO
          END DO
        END DO
      ELSE
        CALL mpi_isend (Asend, MySize(MyRank), MP_FLOAT, MyMaster,      &
     &                  MyRank+5, OCN_COMM_WORLD, Srequest, MyError)
        CALL mpi_wait (Srequest, Sstatus, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ISEND', MyRank, MyError, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END IF

# ifdef MASKING
!
! If pocessing only water-points, remove land points and repack.
!
      IF ((MyRank.eq.MyMaster).and.(gtype.lt.0)) THEN
        ic=0
        np=IJlen*Klen
        DO i=1,np
          IF (Aout(i).lt.spval) THEN
            ic=ic+1
            Aout(ic)=Aout(i)
          END IF
        END DO
        Npts=ic
      END IF
# endif
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 42)
# endif

      RETURN
      END SUBROUTINE mp_gather

      SUBROUTINE mp_gather_state (ng, model, Nstr, Nend, Asize,         &
     &                            A, Aout)
!
!=======================================================================
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This routine gathers (threaded to global) state data to all nodes   !
!  in the group. This  routine  is used to unpack the state data for   !
!  the GST analysis propagators.                                       !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nstr       Threaded array lower bound.                           !
!     Nend       Threaded array upper bound.                           !
!     Asize      Size of the full state.                               !
!     A          Threaded 1D array process.                            !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     Aout       Collected data from each node packed into 1D full     !
!                  state array.                                        !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_ncparam
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model
      integer, intent(in) :: Nstr, Nend, Asize

      real(r8), intent(in)  :: A(Nstr:Nend)
      real(r8), intent(out) :: Aout(Asize)
!
!  Local variable declarations.
!
      integer :: LB, Lstr, MyError, i, np, rank, request

      integer :: bounds(2)
      integer, dimension(MPI_STATUS_SIZE) :: status
      integer, dimension(2,0:NtileI(ng)*NtileJ(ng)-1) :: Abounds

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 42)
# endif
!
!-----------------------------------------------------------------------
!  Collect data from all nodes.
!-----------------------------------------------------------------------
!
!  Collect data lower and upper bound dimensions.
!
      bounds(1)=Nstr
      bounds(2)=Nend
      CALL mpi_gather (bounds, 2, MPI_INTEGER, Abounds, 2, MPI_INTEGER, &
     &                 MyMaster, OCN_COMM_WORLD, MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_GATHER', MyRank, MyError, string(1:Lstr)
 10     FORMAT (/,' MP_GATHER_STATE - error during ',a,                 &
     &          ' call, Node = ',i3.3,' Error = ',i3,/,13x,a)
        exit_flag=2
        RETURN
      END IF
!
!  If master node, loop over other nodes and receive the data.
!
      IF (MyRank.eq.MyMaster) THEN
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          np=Abounds(2,rank)-Abounds(1,rank)+1
          LB=Abounds(1,rank)
          CALL mpi_irecv (Aout(LB:), np, MP_FLOAT, rank, rank+5,        &
     &                    OCN_COMM_WORLD, request, MyError)
          CALL mpi_wait (request, status, MyError)
          IF (MyError.ne.MPI_SUCCESS) THEN
            CALL mpi_error_string (MyError, string, LEN(string),        &
     &                             MyError)
            Lstr=LEN_TRIM(string)
            WRITE (stdout,10) 'MPI_IRECV', rank, MyError, string(1:Lstr)
            exit_flag=2
            RETURN
          END IF
        END DO
!
!  Load master node contribution.
!
        DO i=Nstr,Nend
          Aout(i)=A(i)
        END DO
!
!  Otherwise, send data to master node.
!
      ELSE
        np=Nend-Nstr+1
        CALL mpi_isend (A(Nstr:), np, MP_FLOAT, MyMaster, MyRank+5,     &
     &                  OCN_COMM_WORLD, request, MyError)
        CALL mpi_wait (request, status, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ISEND', MyRank, MyError, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END IF
!
!  Broadcast collected data to all nodes.
!
      CALL mpi_bcast (Aout, Asize, MP_FLOAT, MyMaster, OCN_COMM_WORLD,  &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
        exit_flag=2
        RETURN
      END IF

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 42)
# endif

      RETURN
      END SUBROUTINE mp_gather_state

      FUNCTION mp_ncread (ng, model, ncid, ncvar, ncfile, ncrec,        &
     &                    LB1, UB1, LB2, UB2, Ascale, A)
!
!=======================================================================
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This function reads floating point data from specified NetCDF file  !
!  and scatters it to the other nodes.  This  routine is used to read  !
!  model state vectors or matrices. If both LB2 and UB2 are zero, its  !
!  assumed that the second dimension is a parallel node dimension.     !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     ncid       NetCDF file ID.                                       !
!     ncvar      NetCDF variable name.                                 !
!     ncfile     NetCDF file name.                                     !
!     ncrec      NetCDF record index to write. If negative, it assumes !
!                  that the variable is recordless.                    !
!     LB1        First-dimension Lower bound.                          !
!     UB1        First-dimension Upper bound.                          !
!     LB2        Second-dimension Lower bound.                         !
!     UB2        Second-dimension Upper bound.                         !
!     Ascale     Factor to scale field after reading (real).           !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Field to read in (real).                              !
!     mp_ncread  Error flag (integer).                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_ncparam
      USE mod_netcdf
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, ncid, ncrec
      integer, intent(in) :: LB1, UB1, LB2, UB2

      real(r8), intent(in) :: Ascale

      real(r8), intent(out) :: A(LB1:UB1,LB2:UB2)

      character (len=*), intent(in) :: ncvar
      character (len=*), intent(in) :: ncfile
!
!  Local variable declarations.
!
      logical :: IsNodeDim

      integer :: Lstr, MyError, Npts, i, j, np, rank, request, varid
      integer :: bounds(4), start(2), total(2)

      integer, dimension(MPI_STATUS_SIZE) :: status
      integer, dimension(4,0:NtileI(ng)*NtileJ(ng)-1) :: Asize

      integer :: mp_ncread

      real(r8), allocatable :: Asend(:)

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 43)
# endif
!
!-----------------------------------------------------------------------
!  Read requested NetCDF file and scatter it to all nodes.
!-----------------------------------------------------------------------
!
      mp_ncread=nf_noerr
      IF ((LB2.eq.0).and.(UB2.eq.0)) THEN
        IsNodeDim=.TRUE.
      ELSE
        IsNodeDim=.FALSE.
      END IF
!
!  Collect data lower and upper bounds dimensions.
!
      bounds(1)=LB1
      bounds(2)=UB1
      bounds(3)=LB2
      bounds(4)=UB2
      CALL mpi_gather (bounds, 4, MPI_INTEGER, Asize, 4, MPI_INTEGER,   &
     &                 MyMaster, OCN_COMM_WORLD, MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_GATHER', MyRank, MyError,                &
     &                    string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
!
!  If not master node, receive data from master node.
!
      IF (MyRank.ne.MyMaster) THEN
        np=(UB1-LB1+1)*(UB2-LB2+1)
        CALL mpi_irecv (A(LB1,LB2), np, MP_FLOAT, MyMaster, MyRank+5,   &
     &                  OCN_COMM_WORLD, request, MyError)
        CALL mpi_wait (request, status, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_IRECV', MyRank, MyError, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
!
!  Scale recieved (read) data.
!        
        DO j=LB2,UB2
          DO i=LB1,UB1
            A(i,j)=A(i,j)*Ascale
          END DO
        END DO
!
!  Otherwise, if master node allocate the send buffer.
!
      ELSE
        Npts=0
        DO rank=0,NtileI(ng)*NtileJ(ng)-1
          np=(Asize(2,rank)-Asize(1,rank)+1)*                           &
     &       (Asize(4,rank)-Asize(3,rank)+1)
          Npts=MAX(Npts, np)
        END DO
        IF (.not.allocated(Asend)) THEN
          allocate (Asend(Npts))
        END IF
!
!  If master node, loop over all nodes and read buffers to send.
!
        DO rank=0,NtileI(ng)*NtileJ(ng)-1
          start(1)=Asize(1,rank)
          total(1)=Asize(2,rank)-Asize(1,rank)+1
          IF (IsNodeDim) THEN
            start(2)=rank+1
            total(2)=1
          ELSE
            start(2)=Asize(3,rank)
            total(2)=Asize(4,rank)-Asize(3,rank)+1
          END IF
          mp_ncread=nf_inq_varid(ncid, TRIM(ncvar), varid)
          mp_ncread=nf_get_vara_TYPE(ncid, varid, start, total, Asend)
!
!  Send buffer to all nodes, except itself.
!
          IF (rank.eq.MyMaster) THEN
            np=0
            DO j=LB2,UB2
              DO i=LB1,UB1
                np=np+1
                A(i,j)=Asend(np)*Ascale
              END DO
            END DO
          ELSE
            np=(Asize(2,rank)-Asize(1,rank)+1)*                         &
     &         (Asize(4,rank)-Asize(3,rank)+1)
            CALL mpi_isend (Asend, np, MP_FLOAT, rank, rank+5,          &
     &                      OCN_COMM_WORLD, request, MyError)
            CALL mpi_wait (request, status, MyError)
            IF (MyError.ne.MPI_SUCCESS) THEN
              CALL mpi_error_string (MyError, string, LEN(string),      &
     &                               MyError)
              Lstr=LEN_TRIM(string)
              WRITE (stdout,10) 'MPI_ISEND', rank, MyError,             &
     &                          string(1:Lstr)
              exit_flag=2
              RETURN
            END IF
          END IF
        END DO
      END IF
!
!  Deallocate send buffer.
!
      IF (allocated(Asend).and.(MyRank.eq.MyMaster)) THEN
        deallocate (Asend)
      END IF

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 43)
# endif

 10   FORMAT (/,' MP_NCREAD - error during ',a,' call, Node = ',        &
     &          i3.3,' Error = ',i3,/,13x,a)

      RETURN
      END FUNCTION mp_ncread

      FUNCTION mp_ncwrite (ng, model, ncid, ncvar, ncfile, ncrec,       &
     &                     LB1, UB1, LB2, UB2, Ascale, A)
!
!=======================================================================
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This function collects floating point data from the other nodes and !
!  writes it into specified NetCDF file. This routine is used to write !
!  model state vectors or matrices. It boths LB2 and UB2 are zero, its !
!  assumed that the second dimension is a parallel node dimension.     !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     ncid       NetCDF file ID.                                       !
!     ncvar      NetCDF variable name.                                 !
!     ncfile     NetCDF file name.                                     !
!     ncrec      NetCDF record index to write. If negative, it assumes !
!                  that the variable is recordless.                    !
!     LB1        First-dimension Lower bound.                          !
!     UB1        First-dimension Upper bound.                          !
!     LB2        Second-dimension Lower bound.                         !
!     UB2        Second-dimension Upper bound.                         !
!     Ascale     Factor to scale field before writing (real).          !
!     A          Field to write out (real).                            !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     mp_ncwrite Error flag (integer).                                 !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_ncparam
      USE mod_netcdf
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, ncid, ncrec
      integer, intent(in) :: LB1, UB1, LB2, UB2

      real(r8), intent(in) :: Ascale

      real(r8), intent(in) :: A(LB1:UB1,LB2:UB2)

      character (len=*), intent(in) :: ncvar
      character (len=*), intent(in) :: ncfile
!
!  Local variable declarations.
!
      logical :: IsNodeDim

      integer :: Lstr, MyError, Npts, i, j, np, rank, request, varid
      integer :: bounds(4), start(2), total(2)

      integer, dimension(MPI_STATUS_SIZE) :: status
      integer, dimension(4,0:NtileI(ng)*NtileJ(ng)-1) :: Asize

      integer :: mp_ncwrite

      real(r8), allocatable :: Arecv(:)

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 42)
# endif
!
!-----------------------------------------------------------------------
!  Collect and write data into requested NetCDF file.
!-----------------------------------------------------------------------
!
      mp_ncwrite=nf_noerr
      IF ((LB2.eq.0).and.(UB2.eq.0)) THEN
        IsNodeDim=.TRUE.
      ELSE
        IsNodeDim=.FALSE.
      END IF
!
!  Collect data lower and upper bounds dimensions.
!
      bounds(1)=LB1
      bounds(2)=UB1
      bounds(3)=LB2
      bounds(4)=UB2
      CALL mpi_gather (bounds, 4, MPI_INTEGER, Asize, 4, MPI_INTEGER,   &
     &                 MyMaster, OCN_COMM_WORLD, MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_GATHER', MyRank, MyError,                &
     &                    string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
!
!  If master node, allocate the receive buffer.
!
      IF (MyRank.eq.MyMaster) THEN
        Npts=0
        DO rank=0,NtileI(ng)*NtileJ(ng)-1
          np=(Asize(2,rank)-Asize(1,rank)+1)*                           &
     &       (Asize(4,rank)-Asize(3,rank)+1)
          Npts=MAX(Npts, np)
        END DO
        IF (.not.allocated(Arecv)) THEN
          allocate (Arecv(Npts))
        END IF
!
!  Write out master node contribution.
!
        start(1)=LB1
        total(1)=UB1-LB1+1
        IF (IsNodeDim) THEN
          start(2)=MyRank+1
          total(2)=1
        ELSE
          start(2)=LB2
          total(2)=UB2-LB2+1
        END IF
        np=0
        DO j=LB2,UB2
          DO i=LB1,UB1
            np=np+1
            Arecv(np)=A(i,j)
          END DO
        END DO
        mp_ncwrite=nf_inq_varid(ncid, TRIM(ncvar), varid)
        mp_ncwrite=nf_put_vara_TYPE(ncid, varid, start, total, Arecv)
!
!  If master node, loop over other nodes and receive the data.
!
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          np=(Asize(2,rank)-Asize(1,rank)+1)*                           &
     &       (Asize(4,rank)-Asize(3,rank)+1)
          CALL mpi_irecv (Arecv, np, MP_FLOAT, rank, rank+5,            &
     &                    OCN_COMM_WORLD, request, MyError)
          CALL mpi_wait (request, status, MyError)
          IF (MyError.ne.MPI_SUCCESS) THEN
            CALL mpi_error_string (MyError, string, LEN(string),        &
     &                             MyError)
            Lstr=LEN_TRIM(string)
            WRITE (stdout,10) 'MPI_IRECV', rank, MyError, string(1:Lstr)
            exit_flag=2
            RETURN
          END IF
!
!  Write out data into NetCDF file.
!
          start(1)=Asize(1,rank)
          total(1)=Asize(2,rank)-Asize(1,rank)+1
          IF (IsNodeDim) THEN
            start(2)=rank+1
            total(2)=1
          ELSE
            start(2)=Asize(3,rank)
            total(2)=Asize(4,rank)-Asize(3,rank)+1
          END IF
          DO i=1,np
            Arecv(i)=Arecv(i)*Ascale
          END DO
          mp_ncwrite=nf_put_vara_TYPE(ncid, varid, start, total, Arecv)
        END DO
!
!  Otherwise, send data to master node.
!
      ELSE
        np=(UB1-LB1+1)*(UB2-LB2+1)
        CALL mpi_isend (A(LB1:,LB2:), np, MP_FLOAT, MyMaster, MyRank+5, &
     &                  OCN_COMM_WORLD, request, MyError)
        CALL mpi_wait (request, status, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ISEND', MyRank, MyError, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END IF
!
!  Deallocate receive buffer.
!
      IF (allocated(Arecv).and.(MyRank.eq.MyMaster)) THEN
        deallocate (Arecv)
      END IF

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 42)
# endif

 10   FORMAT (/,' MP_NCWRITE - error during ',a,' call, Node = ',       &
     &          i3.3,' Error = ',i3,/,13x,a)

      RETURN
      END FUNCTION mp_ncwrite

      SUBROUTINE mp_reduce (ng, model, Asize, A, op_handle)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine collects and reduces requested variables from all      !
!  nodes in the group.  Then,  it broadcasts reduced variables to      !
!  all nodes in the group.                                             !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Asize      Number of scalar variables to reduce.                 !
!     A          Vector of scalar variables to reduce.                 !
!     op_handle  Reduction operation handle (string).  The following   !
!                  reduction operations are supported:                 !
!                  'MIN', 'MAX', 'SUM'                                 !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Vector of reduced scalar variables.                   !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model, Asize

      character (len=*), intent(in) :: op_handle(Asize)

      real(r8), intent(inout) :: A(Asize)
!
!  Local variable declarations.
!
      integer :: handle, i, rank
      integer :: Lstr, MyError, request

      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Rrequest

      integer, dimension(MPI_STATUS_SIZE) :: Rstatus
      integer, dimension(MPI_STATUS_SIZE) :: Sstatus

      real(r8), dimension(Asize,0:NtileI(ng)*NtileJ(ng)-1) :: Arecv
      real(r8), dimension(Asize) :: Areduce
      real(r8), dimension(Asize) :: Asend

      character (len=90) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 41)
# endif
!
!-----------------------------------------------------------------------
!  Collect and reduce requested scalar variables.
!-----------------------------------------------------------------------
!
!  Pack data to reduce.
!
      DO i=1,Asize
        Asend(i)=A(i)
      END DO
!
!  Collect and reduce.
!
# if defined REDUCE_ALLREDUCE
      DO i=1,Asize
        IF (op_handle(i)(1:3).eq.'MIN') THEN
          handle=MPI_MIN
        ELSE IF (op_handle(i)(1:3).eq.'MAX') THEN
          handle=MPI_MAX
        ELSE IF (op_handle(i)(1:3).eq.'SUM') THEN
          handle=MPI_SUM
        END IF
        CALL mpi_allreduce (Asend(i), Areduce(i), 1, MP_FLOAT, handle,  &
     &                      OCN_COMM_WORLD, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ALLREDUCE', MyRank, MyError,           &
     &                      string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END DO
# elif defined REDUCE_ALLGATHER
      CALL mpi_allgather (Asend, Asize, MP_FLOAT,                       &
     &                    Arecv, Asize, MP_FLOAT,                       &
     &                    OCN_COMM_WORLD, MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_ALLGATHER', MyRank, MyError,             &
     &                    string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
      DO i=1,Asize
        Areduce(i)=Arecv(i,0)
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          IF (op_handle(i)(1:3).eq.'MIN') THEN
            Areduce(i)=MIN(Areduce(i),Arecv(i,rank))
          ELSE IF (op_handle(i)(1:3).eq.'MAX') THEN
            Areduce(i)=MAX(Areduce(i),Arecv(i,rank))
          ELSE IF (op_handle(i)(1:3).eq.'SUM') THEN
            Areduce(i)=Areduce(i)+Arecv(i,rank)
          END IF
        END DO
      END DO
# else
      IF (MyRank.eq.MyMaster) THEN
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          CALL mpi_irecv (Arecv(1,rank), Asize, MP_FLOAT, rank,         &
     &                    rank+500, OCN_COMM_WORLD, Rrequest(rank),     &
     &                    MyError)
        END DO
        DO i=1,Asize
          Areduce(i)=Asend(i)
        END DO
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          CALL mpi_wait (Rrequest(rank), Rstatus, MyError)
          IF (MyError.ne.MPI_SUCCESS) THEN
            CALL mpi_error_string (MyError, string, LEN(string),        &
     &                             MyError)
            Lstr=LEN_TRIM(string)
            WRITE (stdout,10) 'MPI_IRECV', rank, Rerror, string(1:Lstr)
            exit_flag=2
            RETURN
          END IF
          DO i=1,Asize
            IF (op_handle(i)(1:3).eq.'MIN') THEN
              Areduce(i)=MIN(Areduce(i),Arecv(i,rank))
            ELSE IF (op_handle(i)(1:3).eq.'MAX') THEN
              Areduce(i)=MAX(Areduce(i),Arecv(i,rank))
            ELSE IF (op_handle(i)(1:3).eq.'SUM') THEN
              Areduce(i)=Areduce(i)+Arecv(i,rank)
            END IF
          END DO
        END DO
      ELSE
        CALL mpi_isend (Asend, Asize, MP_FLOAT, MyMaster, MyRank+500,   &
     &                  OCN_COMM_WORLD, request, MyError)
        CALL mpi_wait (request, Sstatus, MyError)
        IF (Serror.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ISEND', MyRank, Serror, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END IF
!
!  Broadcast reduced variables from process to all processes in the
!  group.
!
      CALL mpi_bcast (Areduce, Asize, MP_FLOAT, MyMaster,               &
     &                OCN_COMM_WORLD, MyError)
      IF (Serror.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
# endif
 10   FORMAT (/,' MP_REDUCE - error during ',a,' call, Node = ',        &
     &        i3.3,' Error = ',i3,/,13x,a)
!
!  Unpack.
!
      DO i=1,Asize
        A(i)=Areduce(i)
      END DO
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 41)
# endif

      RETURN
      END SUBROUTINE mp_reduce

      SUBROUTINE mp_scatter (ng, model, LBi, UBi, LBj, UBj, LBk, UBk,   &
     &                       Nghost, gtype, Amin, Amax,                 &
# if defined READ_WATER && defined MASKING
     &                       NWpts, IJ_water,                           &
# endif
     &                       Npts, A, Aout)
!
!=======================================================================
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This routine broadcasts input global data, packed as 1D real array, !
!  to each spawned MPI node.  The output tile array (2D or 3D) has its !
!  ghost-points updated in the halo region.  It is used by the master  !
!  node to scatter input global data to each tiled node.               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     LBi        I-dimension Lower bound.                              !
!     UBi        I-dimension Upper bound.                              !
!     LBj        J-dimension Lower bound.                              !
!     UBj        J-dimension Upper bound.                              !
!     LBk        K-dimension Lower bound, if any. Otherwise, a value   !
!                  of one is expected.                                 !
!     LBk        K-dimension Upper bound, if any. Otherwise, a value   !
!                  of one is expected.                                 !
!     UBk        K-dimension Upper bound.                              !
!     Nghost     Number of ghost-points in the halo region.            !
!     gtype      C-grid type. If negative and Land-Sea mask is         !
!                  available, only water-points are processed.         !
!     Amin       Input array minimum value.                            !
!     Amax       Input array maximum value.                            !
!     NWpts      Number of water points.                               !
!     IJ_water   IJ-indices for water points.                          !
!     Npts       Number of points to processes in A.                   !
!     A          Input global data from each node packed into 1D array !
!                  in column-major order. That is, in the same way     !
!                  that Fortran multi-dimensional arrays are stored    !
!                  in memory.                                          !
!     Npts       Number of points to processes in A.                   !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     Aout       Tile array (2D or 3D) with updated ghost-points in    !
!                  the halo region.                                    !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_ncparam
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model
      integer, intent(in) :: LBi, UBi, LBj, UBj, LBk, UBk
      integer, intent(in) :: Nghost, gtype, Npts

# if defined READ_WATER && defined MASKING
      integer, intent(in) :: NWpts
      integer, intent(in) :: IJ_water(NWpts)
# endif
      real(r8), intent(inout) :: Amin, Amax
      real(r8), intent(inout) :: A(Npts+2)
      real(r8), intent(out) :: Aout(LBi:UBi,LBj:UBj,LBk:UBk)
!
!  Local variable declarations.
!
      integer :: Io, Ie, Jo, Je, Ioff, Joff, Koff
      integer :: Ilen, Jlen, Klen, IJlen
      integer :: Lstr, MyError, MySize, MyType, rank
      integer :: i, ic, ij, j, jc, k, kc, mc, nc

      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Imin, Imax
      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Jmin, Jmax
      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Itile, Jtile

      real(r8), dimension((Lm(ng)+2)*(Mm(ng)+2)*(UBk-LBk+1)) :: Arecv

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 43)
# endif
!
!-----------------------------------------------------------------------
!  Set horizontal starting and ending indices for parallel domain
!  partitions in the XI- and ETA-directions.
!-----------------------------------------------------------------------
!
!  Set first and last grid point according to staggered C-grid
!  classification. The, set 1D counter offsets.
!
      Io=0
      Ie=Lm(ng)+1
      Jo=0
      Je=Mm(ng)+1
      MyType=ABS(gtype)
      IF ((MyType.eq.p2dvar).or.(MyType.eq.u2dvar).or.                  &
     &    (MyType.eq.p3dvar).or.(MyType.eq.u3dvar)) Io=1
      IF ((MyType.eq.p2dvar).or.(MyType.eq.v2dvar).or.                  &
     &    (MyType.eq.p3dvar).or.(MyType.eq.v3dvar)) Jo=1
      IF (Io.eq.0) THEN
        Ioff=1
      ELSE
        Ioff=0
      END IF
      IF (Jo.eq.0) THEN
        Joff=0
      ELSE
        Joff=1
      END IF
      IF (LBk.eq.0) THEN
        Koff=0
      ELSE
        Koff=1
      END IF
      Ilen=Ie-Io+1
      Jlen=Je-Jo+1
      Klen=UBk-LBk+1
      IJlen=Ilen*Jlen
!
!  Set physical, non-overlaping (no ghost-points) ranges according to
!  tile rank.
!
      DO rank=0,NtileI(ng)*NtileJ(ng)-1
        CALL get_bounds (ng, rank, gtype, Nghost,                       &
     &                   Itile(rank), Jtile(rank),                      &
     &                   Imin(rank), Imax(rank),                        &
     &                   Jmin(rank), Jmax(rank))
      END DO
!
!  Size of broadcast buffer.
!
      IF (gtype.gt.0) THEN
        MySize=IJlen*Klen
      ELSE
        MySize=Npts
      END IF
!
!-----------------------------------------------------------------------
!  Scatter requested array data. 
!-----------------------------------------------------------------------
!
!  If master processor, append minimum and maximum values to the end of
!  the buffer. 
!
      IF (MyRank.eq.MyMaster) Then
        A(MySize+1)=Amin
        A(MySize+2)=Amax
      END IF
      MySize=MySize+2
!
!  Broadcast data to all processes in the group, itself included.
!
      CALL mpi_bcast (A, MySize, MP_FLOAT, MyMaster, OCN_COMM_WORLD,    &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
         Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
 10     FORMAT (/,' MP_SCATTER - error during ',a,' call, Node = ',     &
     &          i3.3, ' Error = ',i3,/,15x,a)
        exit_flag=2
        RETURN
      END IF
!
!  If water points only, fill land points.
!
      IF (gtype.gt.0) THEN
        DO nc=1,MySize-2
          Arecv(nc)=A(nc)
        END DO
# if defined READ_WATER && defined MASKING
      ELSE
        nc=0
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          ij=0
          mc=0
          DO j=Jo,Je
            jc=(j-Joff)*Ilen+kc
            DO i=Io,Ie
              ij=ij+1
              ic=i+Ioff+jc
              IF (IJ_water(mc+1).eq.ij) THEN
                mc=mc+1
                nc=nc+1
                Arecv(ic)=A(nc)
              ELSE
                Arecv(ic)=0.0_r8
              ENDIF
            END DO
          END DO
        END DO
# endif
      END IF
!
!  Unpack data buffer.
!
      DO k=LBk,UBk
        kc=(k-Koff)*IJlen
        DO j=Jmin(MyRank),Jmax(MyRank)
          jc=(j-Joff)*Ilen+kc
          DO i=Imin(MyRank),Imax(MyRank)
            ic=i+Ioff+jc
            Aout(i,j,k)=Arecv(ic)
          END DO
        END DO
      END DO
      Amin=A(MySize-1)
      Amax=A(MySize)
      
# if defined EW_PERIODIC || defined NS_PERIODIC
!
!-----------------------------------------------------------------------
!  Apply periodic boundary conditions.
!-----------------------------------------------------------------------
!
#  ifdef EW_PERIODIC
      IF (Itile(MyRank).eq.(NtileI(ng)-1)) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO j=Jmin(MyRank),Jmax(MyRank)
            ic=Ioff+(j-Joff)*Ilen+kc
            Aout(Lm(ng)+1,j,k)=Arecv(1+ic)
            Aout(Lm(ng)+2,j,k)=Arecv(2+ic)
#   ifdef THREE_GHOST
            Aout(Lm(ng)+3,j,k)=Arecv(3+ic)
#   endif
          END DO
        END DO
      END IF
      IF (Itile(MyRank).eq.0) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO j=Jmin(MyRank),Jmax(MyRank)
            ic=Ioff+(j-Joff)*Ilen+kc
            Aout(-2,j,k)=Arecv(Lm(ng)-2+ic)
            Aout(-1,j,k)=Arecv(Lm(ng)-1+ic)
            Aout( 0,j,k)=Arecv(Lm(ng)  +ic)
          END DO
        END DO
      END IF
#  endif
#  ifdef NS_PERIODIC
      IF (Jtile(MyRank).eq.(NtileJ(ng)-1)) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO i=Imin(MyRank),Imax(MyRank)
            Aout(i,Mm(ng)+1,k)=Arecv(i+Ioff+(1-Joff)*Ilen+kc)
            Aout(i,Mm(ng)+2,k)=Arecv(i+Ioff+(2-Joff)*Ilen+kc)
#   ifdef THREE_GHOST
            Aout(i,Mm(ng)+3,k)=Arecv(i+Ioff+(3-Joff)*Ilen+kc)
#   endif
          END DO
        END DO
      END IF
      IF (Jtile(MyRank).eq.0) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO i=Imin(MyRank),Imax(MyRank)
            Aout(i,-2,k)=Arecv(i+Ioff+(Mm(ng)-2-Joff)*Ilen+kc)
            Aout(i,-1,k)=Arecv(i+Ioff+(Mm(ng)-1-Joff)*Ilen+kc)
            Aout(i, 0,k)=Arecv(i+Ioff+(Mm(ng)  -Joff)*Ilen+kc)
          END DO
        END DO
      END IF
#  endif
#  if defined EW_PERIODIC && defined NS_PERIODIC
      IF ((Itile(MyRank).eq.(NtileI(ng)-1)).and.                        &
     &    (Jtile(MyRank).eq.(NtileJ(ng)-1))) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
#   ifdef THREE_GHOST
          DO j=1,3
#   else
          DO j=1,2
#   endif
            jc=Ioff+(j-Joff)*Ilen+kc
            Aout(Lm(ng)+1,Mm(ng)+j,k)=Arecv(1+jc)
            Aout(Lm(ng)+2,Mm(ng)+j,k)=Arecv(2+jc)
#   ifdef THREE_GHOST
            Aout(Lm(ng)+3,Mm(ng)+j,k)=Arecv(3+jc)
#   endif
          END DO
        END DO
      END IF
      IF ((Itile(MyRank).eq.0).and.                                     &
     &    (Jtile(MyRank).eq.(NtileJ(ng)-1))) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
#   ifdef THREE_GHOST
          DO j=1,3
#   else
          DO j=1,2
#   endif
            jc=Ioff+(j-Joff)*Ilen+kc
            Aout(-2,Mm(ng)+j,k)=Arecv(Lm(ng)-2+jc)
            Aout(-1,Mm(ng)+j,k)=Arecv(Lm(ng)-1+jc)
            Aout( 0,Mm(ng)+j,k)=Arecv(Lm(ng)  +jc)
          END DO
        END DO
      END IF
      IF ((Itile(MyRank).eq.(NtileI(ng)-1)).and.                        &
     &    (Jtile(MyRank).eq.0)) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO j=-2,0
            jc=Ioff+(Mm(ng)+j-Joff)*Ilen+kc
            Aout(Lm(ng)+1,j,k)=Arecv(1+jc)
            Aout(Lm(ng)+2,j,k)=Arecv(2+jc)
#   ifdef THREE_GHOST
            Aout(Lm(ng)+3,j,k)=Arecv(3+jc)
#   endif
          END DO
        END DO
      END IF
      IF ((Itile(MyRank).eq.0).and.                                     &
     &    (Jtile(MyRank).eq.0)) THEN
        DO k=LBk,UBk
          kc=(k-Koff)*IJlen
          DO j=-2,0
            jc=Ioff+(Mm(ng)+j-Joff)*Ilen+kc
            Aout(-2,j,k)=Arecv(Lm(ng)-2+jc)
            Aout(-1,j,k)=Arecv(Lm(ng)-1+jc)
            Aout( 0,j,k)=Arecv(Lm(ng)  +jc)
          END DO
        END DO
      END IF
#  endif
# endif
# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 43)
# endif

      RETURN
      END SUBROUTINE mp_scatter

      SUBROUTINE mp_scatter_state (ng, model, Nstr, Nend, Asize,        &
     &                             A, Aout)
!
!=======================================================================
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!================================================== Hernan G. Arango ===
!                                                                      !
!  This routine scatters (global to threaded) state data to all nodes  !
!  in the group. Before this can be done, the global data needs to be  !
!  collected from all the  nodes  by the master.  This is achieved by  !
!  summing the input values at each point.  This  routine  is used to  !
!  pack the state data for the GST analysis propagators.               !
!                                                                      !
!  On Input:                                                           !
!                                                                      !
!     ng         Nested grid number.                                   !
!     model      Calling model identifier.                             !
!     Nstr       Threaded array lower bound.                           !
!     Nend       Threaded array upper bound.                           !
!     Asize      Size of the .                                         !
!     A          Threaded 1D array process.                            !
!                                                                      !
!  On Output:                                                          !
!                                                                      !
!     A          Collected data from all nodes.                        !
!     Aout       Threaded block of data.                               !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits 
      USE mod_ncparam
      USE mod_scalars
!
      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, model
      integer, intent(in) :: Nstr, Nend, Asize

      real(r8), intent(inout)  :: A(Asize)

      real(r8), intent(out) :: Aout(Nstr:Nend)
!
!  Local variable declarations.
!
      integer :: Lstr, MyError, request
      integer :: i, rank

      integer, dimension(0:NtileI(ng)*NtileJ(ng)-1) :: Rrequest

      integer, dimension(MPI_STATUS_SIZE) :: status

      real(r8), allocatable :: Arecv(:)

      character (len=80) :: string

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn on time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_on (ng, model, 42)
# endif
!
!-----------------------------------------------------------------------
!  Collect data blocks from all nodes and scatter the data to all nodes.
!-----------------------------------------------------------------------
!
!  All nodes have distinct pieces of the data and zero everywhere else.
!  So the strategy here is for the master node to receive the data from
!  the other nodes (excluding itself) and accumulate the sum at each
!  point. Then, the master node broadcast (itself included) its copy of
!  the accumlated data to other the nodes in the group. After this, each
!  node loads only the required block of the data into output array.
!
!  Notice that only the master node allocates the recieving buffer
!  (Arecv). It also receives only buffer at the time to avoid having
!  a very large communication array.  So here memory is more important
!  than time.
!
      IF (MyRank.eq.MyMaster) THEN
!
!  If master node, allocate and receive buffer.
!
        IF (.not.allocated(Arecv)) THEN
          allocate (Arecv(Asize))
        END IF
!
!  If master node, loop over other nodes to receive and accumulate the
!  data.
!
        DO rank=1,NtileI(ng)*NtileJ(ng)-1
          CALL mpi_irecv (Arecv, Asize, MP_FLOAT, rank, rank+5,         &
     &                    OCN_COMM_WORLD, Rrequest(rank), MyError)
          CALL mpi_wait (Rrequest(rank), status, MyError)
          IF (MyError.ne.MPI_SUCCESS) THEN
            CALL mpi_error_string (MyError, string, LEN(string),        &
     &                             MyError)
            Lstr=LEN_TRIM(string)
            WRITE (stdout,10) 'MPI_IRECV', rank, MyError, string(1:Lstr)
 10         FORMAT (/,' MP_SCATTER_STATE - error during ',a,            &
     &              ' call, Node = ', i3.3,' Error = ',i3,/,13x,a)
            exit_flag=2
            RETURN
          END IF
          DO i=1,Asize
            A(i)=A(i)+Arecv(i)
          END DO
        END DO
!
!  Otherwise, send data to master node.
!
      ELSE
        CALL mpi_isend (A, Asize, MP_FLOAT, MyMaster, MyRank+5,         &
     &                  OCN_COMM_WORLD, request, MyError)
        CALL mpi_wait (request, status, MyError)
        IF (MyError.ne.MPI_SUCCESS) THEN
          CALL mpi_error_string (MyError, string, LEN(string), MyError)
          Lstr=LEN_TRIM(string)
          WRITE (stdout,10) 'MPI_ISEND', MyRank, MyError, string(1:Lstr)
          exit_flag=2
          RETURN
        END IF
      END IF
!
!  Broadcast accumulated (full) data to all nodes.
!
      CALL mpi_bcast (A, Asize, MP_FLOAT, MyMaster, OCN_COMM_WORLD,     &
     &                MyError)
      IF (MyError.ne.MPI_SUCCESS) THEN
        CALL mpi_error_string (MyError, string, LEN(string), MyError)
        Lstr=LEN_TRIM(string)
        WRITE (stdout,10) 'MPI_BCAST', MyRank, MyError, string(1:Lstr)
        exit_flag=2
        RETURN
      END IF
!
!  Load appropriate data block into output array.
!
      DO i=Nstr,Nend
        Aout(i)=A(i)
      END DO

# ifdef PROFILE
!
!-----------------------------------------------------------------------
!  Turn off time clocks.
!-----------------------------------------------------------------------
!
      CALL wclock_off (ng, model, 42)
# endif

      RETURN
      END SUBROUTINE mp_scatter_state

      SUBROUTINE mp_dump (ng, tile, gtype,                              &
     &                    ILB, IUB, JLB, JUB, KLB, KUB, A, name)
!
!================================================== Hernan G. Arango ===
!  Copyright (c) 2005 ROMS/TOMS Group                                  !
!=======================================================================
!                                                                      !
!  This routine is used to debug distributed-memory communications.    !
!  It writes field into an ASCII file for further post-processing.     !
!                                                                      !
!=======================================================================
!

      USE mod_param
      USE mod_parallel
      USE mod_ncparam

      implicit none
!
!  Imported variable declarations.
!
      integer, intent(in) :: ng, tile, gtype
      integer, intent(in) :: ILB, IUB, JLB, JUB, KLB, KUB

      real(r8), intent(in) :: A(ILB:IUB,JLB:JUB,KLB:KUB)

      character (len=*) :: name
!
!  Local variable declarations.
!
      common /counter/ nc
      integer :: nc

      logical, save :: first = .TRUE.

      integer :: IstrR, IendR, JstrR, JendR, IstrU, JstrV
      integer :: Imin, Imax, Ioff, Jmin, Jmax, Joff
      integer :: unit

#include "tile.h"
#include "set_bounds.h"
!
!------------------------------------------------------------------------
!  Write out requested field.
!------------------------------------------------------------------------
!
      IF (first) THEN
        nc=0
        first=.FALSE.
      END IF
      nc=nc+1
      IF (Master) THEN
        WRITE (10,'(a,i3.3,a,a)') 'file ', nc, ': ', TRIM(name)
        CALL my_flush (10)
      END IF
!
!  Write out field including ghost-points.
!
      Imin=0
      Imax=Lm(ng)+1
#ifdef EW_PERIODIC
      Ioff=3
#else
      Ioff=1
#endif
      Jmin=0
      Jmax=Mm(ng)+1
#ifdef NS_PERIODIC
      Joff=3
#else
      Joff=1
#endif
      IF ((gtype.eq.p2dvar).or.(gtype.eq.p3dvar).or.                    &
     &    (gtype.eq.u2dvar).or.(gtype.eq.u3dvar)) THEN
        Imin=1
      END IF
      IF ((gtype.eq.p2dvar).or.(gtype.eq.p3dvar).or.                    &
     &    (gtype.eq.v2dvar).or.(gtype.eq.v3dvar)) THEN
        Jmin=1
      END IF

      unit=(MyRank+1)*1000+nc
      WRITE (unit,*) ILB, IUB, JLB, JUB, KLB, KUB,                      &
     &               Ioff, Joff, Imin, Imax, Jmin, Jmax,                &
     &               A(ILB:IUB,JLB:JUB,KLB:KUB)
      CALL my_flush (unit)
!
!  Write out non-overlapping field.
!
      Imin=IstrR
      Imax=IendR
#ifdef EW_PERIODIC
      Ioff=2
#else
      Ioff=1
#endif
      Jmin=JstrR
      Jmax=JendR
#ifdef NS_PERIODIC
      Joff=2
#else
      Joff=1
#endif
      IF ((gtype.eq.p2dvar).or.(gtype.eq.p3dvar).or.                    &
     &    (gtype.eq.u2dvar).or.(gtype.eq.u3dvar)) THEN
        Imin=Istr
        Ioff=Ioff-1
      END IF
      IF ((gtype.eq.p2dvar).or.(gtype.eq.p3dvar).or.                    &
     &    (gtype.eq.v2dvar).or.(gtype.eq.v3dvar)) THEN
        Jmin=Jstr
        Joff=Joff-1
      END IF

      unit=(MyRank+1)*10000+nc
      WRITE (unit,*) Imin, Imax, Jmin, Jmax, KLB, KUB,                  &
     &               Ioff, Joff, Imin, Imax, Jmin, Jmax,                &
     &               A(Imin:Imax,Jmin:Jmax,KLB:KUB)
      CALL my_flush (unit)

      RETURN
      END SUBROUTINE mp_dump
#else
      SUBROUTINE distribute
      END SUBROUTINE distribute
#endif
