#include "cppdefs.h"
      MODULE ocean_control_mod
!
!============================================== Emanuele Di Lorenzo  ===
!  Copyright (c) 2005 ROMS/TOMS Group                Andrew M. Moore   !
!================================================== Hernan G. Arango ===
!                                                                      !
!  ROMS/TOMS Weak Constraint 4-Dimensional Variational (4DVar)         !
!            Data Assimilation Driver: Indirect Representer Approach   !
!                                                                      !
!  This driver is used for weak constraint 4DVar where errors are      !
!  considered in both model and observations.                          !
!                                                                      !
!  The routines in this driver control the initialization,  time-      !
!  stepping, and finalization of  ROMS/TOMS  model following ESMF      !
!  conventions:                                                        !
!                                                                      !
!     initialize                                                       !
!     run                                                              !
!     finalize                                                         !
!                                                                      !
!=======================================================================
!
      implicit none

      PRIVATE
      PUBLIC  :: initialize, run, finalize

      CONTAINS

      SUBROUTINE initialize (first, MyCOMM)
!
!=======================================================================
!                                                                      !
!  This routine allocates and initializes ROMS/TOMS state variables    !
!  and internal and external parameters.                               !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_fourdvar
      USE mod_iounits
      USE mod_scalars
!
#ifdef AIR_OCEAN 
      USE atm_coupler_mod, ONLY : initialize_coupling
#endif
!
!  Imported variable declarations.
!
      logical, intent(inout) :: first

      integer, intent(in), optional :: MyCOMM
!
!  Local variable declarations.
!
      logical :: allocate_vars = .TRUE.

      integer :: ng, thread

#ifdef DISTRIBUTE
!
!-----------------------------------------------------------------------
!  Set distribute-memory (MPI) world communictor.
!-----------------------------------------------------------------------
!
      IF (PRESENT(MyCOMM)) THEN
        OCN_COMM_WORLD=MyCOMM
      ELSE
        OCN_COMM_WORLD=MPI_COMM_WORLD
      END IF
#endif
!
!-----------------------------------------------------------------------
!  On first pass, initialize model parameters a variables for all
!  nested/composed grids.  Notice that the logical switch "first"
!  is used to allow multiple calls to this routine during ensemble
!  configurations.
!-----------------------------------------------------------------------
!
      IF (first) THEN
        first=.FALSE.
!
!  Initialize model internal parameters.
!
        CALL initialize_param
        CALL initialize_parallel
        CALL initialize_scalars
!
!  Initialize wall clocks.
!
        IF (Master) THEN
          WRITE (stdout,10)
 10       FORMAT (' Process Information:',/)
        END IF
        DO ng=1,Ngrids
!$OMP PARALLEL DO PRIVATE(thread) SHARED(ng,numthreads)
          DO thread=0,numthreads-1
            CALL wclock_on (ng, iNLM, 0)
          END DO
!$OMP END PARALLEL DO
        END DO

#ifdef AIR_OCEAN 
!
!  Initialize coupling streams between atmosphere and ocean using the
!  Model Coupling Toolkit (MCT).
!
        CALL initialize_coupling (MyRank)
#endif
!
!  Read in model tunable parameters from standard input.
!
        CALL inp_par (iNLM)
        IF (exit_flag.ne.NoError) THEN
          IF (Master) THEN
            WRITE (stdout,'(/,a,i3,/)') Rerror(exit_flag), exit_flag
          END IF
          RETURN
        END IF
!
!  Allocate and initialize modules variables.
!
        CALL mod_arrays (allocate_vars)
!
!  Allocate and initialize observation arrays.
!
        CALL initialize_fourdvar
!
!  Read in model-error standard deviation factors and spatial
!  convolution diffusion convolution.
!  
        DO ng=1,Ngrids
          CALL get_state (ng, 6, 6, STDname(ng), 1, 1)
#ifdef DISTRIBUTE
          CALL mp_bcasti (ng, iNLM, exit_flag, 1)
#endif
          IF (exit_flag.ne.NoError) RETURN
        END DO

      END IF

      RETURN
      END SUBROUTINE initialize

      SUBROUTINE run
!
!=======================================================================
!                                                                      !
!  This routine time-steps ROMS/TOMS nonlinear, tangent linear and     !
!  adjoint models.                                                     !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_fourdvar
      USE mod_iounits
      USE mod_ncparam
      USE mod_netcdf
      USE mod_scalars
      USE mod_stepping
!
      USE ad_convolution_mod, ONLY : ad_convolution
      USE ad_variability_mod, ONLY : ad_variability
      USE impulse_mod, ONLY : impulse
      USE ini_adjust_mod, ONLY : ini_adjust
      USE ini_fields_mod, ONLY : ini_fields
      USE ini_adjust_mod, ONLY : load_ADtoTL
      USE ini_adjust_mod, ONLY : load_TLtoAD
      USE mod_ocean, ONLY : initialize_ocean
      USE normalization_mod, ONLY : normalization
      USE tl_convolution_mod, ONLY : tl_convolution
      USE tl_variability_mod, ONLY : tl_variability
!
!  Local variable declarations.
!
      logical :: add, converged
      integer :: Lbck, Lini, Nrec
      integer :: i, lstr, my_iic, ng, rec, status, subs, tile, thread
!
!=======================================================================
!  Run model for all nested grids, if any.
!=======================================================================
!
      NEST_LOOP : DO ng=1,Ngrids
!
!  Initialize relevant parameters.
!
        Lold(ng)=1          ! old minimization time index
        Lnew(ng)=2          ! new minimization time index
        Lini=1              ! NLM initial conditions record in INIname
        Lbck=2              ! background record in INIname
        Nrun=1
        Ipass=1
        outer=0
        inner=0
        ERstr=1
        ERend=Nouter
!
!-----------------------------------------------------------------------
!  Configure weak constraint 4DVAR algorithm: Indirect Representer
!  Approach.
!-----------------------------------------------------------------------
!
!  Initialize and set nonlinear model initial conditions.
!
        wrtNLmod(ng)=.TRUE.
        wrtRPmod(ng)=.FALSE.
        wrtTLmod(ng)=.FALSE.
        CALL initial (ng)
        IF (exit_flag.ne.NoError) THEN
          IF (Master) THEN
            WRITE (stdout,10) Rerror(exit_flag), exit_flag
          END IF
          RETURN
        END IF
!
!  Save nonlinear initial conditions (currently in time index 1,
!  background) into record "Lini" of INIname NetCDF file. The record
!  "Lbck" becomes the background state record and the record "Lini"
!  becomes current nonlinear initial conditions.
!
        IF (LcycleINI(ng)) THEN
          tINIindx(ng)=1
          NrecINI(ng)=1
        END IF
        CALL wrt_ini (ng, 1)
#ifdef DISTRIBUTE
        CALL mp_bcasti (ng, iNLM, exit_flag, 1)
#endif
!
!  Set nonlinear output history file as the initial basic state
!  trajectory.
!
        LdefHIS(ng)=.TRUE.
        LwrtHIS(ng)=.TRUE.
        lstr=LEN_TRIM(FWDbase(ng))
        WRITE (HISname(ng),20) FWDbase(ng)(1:lstr-3), outer
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Model-error covariance normalization and stardard deviation factors.
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
!  Compute or read in the model-error correlation normalization factors.
!  If computing, write out factors to NetCDF. This is an expensive
!  computation that needs to be computed only once for a particular
!  application grid and decorrelation scales.
!  
        IF (LwrtNRM(ng)) THEN
          CALL def_norm (ng)
!$OMP PARALLEL DO PRIVATE(ng,thread,subs,tile) SHARED(numthreads)
          DO thread=0,numthreads-1
            subs=NtileX(ng)*NtileE(ng)/numthreads
            DO tile=subs*thread,subs*(thread+1)-1
              CALL normalization (ng, TILE, 2)
            END DO
          END DO
!$OMP END PARALLEL DO
          LdefNRM(ng)=.FALSE.
          LwrtNRM(ng)=.FALSE.
        ELSE
          tNRMindx(ng)=1
          CALL get_state (ng, 5, 5, NRMname(ng), tNRMindx(ng), 1)
#ifdef DISTRIBUTE
          CALL mp_bcasti (ng, iNLM, exit_flag, 1)
#endif
          IF (exit_flag.ne.NoError) RETURN
        END IF
!
!  Define TLM/RPM impulse forcing NetCDF file
!
        LdefTLF(ng)=.TRUE.
        CALL def_impulse (ng)
!
!  Define output 4DVAR NetCDF file containing all processed data
!  at observation locations.
!
        LdefMOD(ng)=.TRUE.
        CALL def_mod (ng)
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Run nonlinear model and compute basic state trajectory, X_n-1(t).
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
        IF (Master) THEN
          WRITE (stdout,30) 'NL', ntstart, ntend
        END IF

        time(ng)=time(ng)-dt(ng)

        NL_LOOP : DO my_iic=ntstart,ntend+1

          iic(ng)=my_iic
#ifdef SOLVE3D
          CALL main3d (ng)
#else
          CALL main2d (ng)
#endif
          IF (exit_flag.ne.NoError) THEN
            IF (Master) THEN
              WRITE (stdout,10) Rerror(exit_flag), exit_flag
            END IF  
            RETURN
          END IF

        END DO NL_LOOP
        wrtNLmod(ng)=.FALSE.
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Run representer model and compute a "prior estimate" state
!  trajectory, X_n(t). Use nonlinear state trajectory (X_n-1) as
!  basic state.
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
!  Set representer model basic state trajectory file to nonlinear file.
!  Activate switch to write the representer model at observation points.
!
        FWDname(ng)=HISname(ng)
        ncFWDid(ng)=ncHISid(ng)
        wrtRPmod(ng)=.TRUE.
!
!  Initialize representer model with the background or reference state.
!
        CALL rp_initial (ng)
        IF (exit_flag.ne.NoError) THEN
          IF (Master) THEN
            WRITE (stdout,10) Rerror(exit_flag), exit_flag
          END IF
          RETURN
        END IF
!
!  Run representer model using the nonlinear trajectory as a basic
!  state.  Compute model solution at observation points, H * X_n.
!
        IF (Master) THEN
          WRITE (stdout,30) 'RP', ntstart, ntend
        END IF

        time(ng)=time(ng)-dt(ng)

        RP_LOOP1 : DO my_iic=ntstart,ntend+1

          iic(ng)=my_iic
#ifdef SOLVE3D
          CALL rp_main3d (ng)
#else
          CALL rp_main2d (ng)
#endif
          IF (exit_flag.ne.NoError) THEN
            IF (Master) THEN
              WRITE (stdout,10) Rerror(exit_flag), exit_flag
            END IF
            RETURN
          END IF

        END DO RP_LOOP1
        wrtRPmod(ng)=.FALSE.
!
!-----------------------------------------------------------------------
!  Solve the system:
!
!              (R_n + Cobs) * Beta_n = h_n
!
!              h_n = Xo - H * X_n
!
!  where R_n is the representer matrix, Cobs is the observation-error
!  covariance, Beta_n are the representer coefficients, h_n is the
!  misfit between observations (Xo) and model (H * X_n), and H is
!  the linearized observation operator. Here, _n denotes a sequence
!  of estimates.
!
!  The system does not need to be solved explicitly by inverting the
!  symmetric stabilized representer matrix, P_n:
!
!              P_n = R_n + Cobs
!
!  but by computing the action of P_n on any vector PSI, such that
!
!              P_n * PSI = R_n * PSI + Cobs * PSI
!
!  The representer matrix is not explicitly computed but evaluated by
!  one integration backward of the adjoint model and one integration
!  forward of the tangent linear model for any forcing vector PSI.
!
!  A preconditioned conjugate gradient algorithm is used to compute
!  an approximation PSI for Beta_n.
!
!-----------------------------------------------------------------------
!
        OUTER_LOOP : DO outer=1,Nouter
!
!  Set approximation vector PSI to representer coefficients Beta_n.
!  Here, PSI is set to misfit between observations and model, H_n.
!
          CALL congrad (ng, outer, 0, converged)
!
!  Set basic state trajectory.
!
          lstr=LEN_TRIM(FWDbase(ng))
          WRITE (FWDname(ng),20) FWDbase(ng)(1:lstr-3), outer-1

          INNER_LOOP : DO inner=1,Ninner
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Integrate adjoint model forced with any vector PSI at the observation
!  locations and generate adjoint trajectory, Lambda_n(t).
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
!  Initialize the adjoint model from rest.
!
            CALL ad_initial (ng)
            IF (exit_flag.ne.NoError) THEN
              IF (Master) THEN
                WRITE (stdout,10) Rerror(exit_flag), exit_flag
              END IF
              RETURN
            END IF
!
!  Set adjoint history NetCDF parameters.  Define adjoint history
!  file one to avoid opening to many files.
!
            IF (Nrun.gt.1) LdefADJ(ng)=.FALSE.
            NrecADJ(ng)=0
            tADJindx(ng)=0
!
!  Time-step adjoint model backwards forced with current PSI vector.
!
            IF (Master) THEN
              WRITE (stdout,30) 'AD', ntstart, ntend
            END IF

            time(ng)=time(ng)+dt(ng)

            AD_LOOP1 : DO my_iic=ntstart,ntend,-1

              iic(ng)=my_iic
#ifdef SOLVE3D
              CALL ad_main3d (ng)
#else
              CALL ad_main2d (ng)
#endif
              IF (exit_flag.ne.NoError) THEN
                IF (Master) THEN
                  WRITE (stdout,10) Rerror(exit_flag), exit_flag
                END IF
                RETURN
              END IF

            END DO AD_LOOP1

#ifdef CONVOLVE
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Convolve adjoint trajectory with model-error covariance and convert
!  to impulse forcing.
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
            Nrec=NrecADJ(ng)
            NrecADJ(ng)=0
            tADJindx(ng)=0
            LwrtState2d(ng)=.TRUE.
            WRITE (stdout,40) outer, inner
!
            DO rec=1,Nrec
!
!  Read adjoint solution.
!
              CALL get_state (ng, iADM, 4, ADJname(ng), rec, Lold(ng))
!
!  To insure symmetry, convolve resulting filtered adjoint solution
!  from above with the tangent linear diffusion operator for the
!  other half of steps. Then, multiply result with its corresponding
!  background-error standard deviations.
!
              add=.FALSE.
!$OMP PARALLEL DO PRIVATE(ng,thread,subs,tile)                          &
!$OMP&            SHARED(inner,add,numthreads)
              DO thread=0,numthreads-1
                subs=NtileX(ng)*NtileE(ng)/numthreads
                DO tile=subs*thread,subs*(thread+1)-1,+1
                  CALL load_ADtoTL (ng, TILE, Lold(ng), Lold(ng), add)
                  CALL tl_convolution (ng, TILE, Lold(ng), 2)
                  CALL tl_variability (ng, TILE, Lold(ng))
                END DO
              END DO
!$OMP END PARALLEL DO
!
!  First, multiply adjoint solution by the background-error standard
!  deviations.  Second, convolve resulting adjoint solution with the
!  adjoint diffusion operator which embeds background-error spatial
!  correlations. Notice that the spatial convolution is only done
!  for half of the diffusion steps.
!
              add=.FALSE.
!$OMP PARALLEL DO PRIVATE(ng,thread,subs,tile)                          &
!$OMP&            SHARED(inner,numthreads)
              DO thread=0,numthreads-1
                subs=NtileX(ng)*NtileE(ng)/numthreads
                DO tile=subs*thread,subs*(thread+1)-1
                  CALL load_TLtoAD (ng, TILE, Lold(ng), Lold(ng), add)
                  CALL ad_variability (ng, TILE, Lold(ng))
                  CALL ad_convolution (ng, TILE, Lold(ng), 2)
                END DO
              END DO
!$OMP END PARALLEL DO
!
!  Overwrite ADJname history NetCDF file with convolved adjoint
!  solution.
!
              kstp(ng)=Lold(ng)
# ifdef SOLVE3D
              nstp(ng)=Lold(ng)
# endif
              CALL ad_wrt_his (ng)
              IF (exit_flag.ne.NoError) RETURN
            END DO
            LwrtState2d(ng)=.FALSE.
#endif
!
!  Convert convolved adjoint solution to impulse forcing. Write out
!  impulse forcing into TLFname NetCDF file. To facilitate the forcing
!  by the TLM and RPM, the forcing is process and written in
!  increasing time coordinates.
!
            WRITE (stdout,50) outer, inner
            tTLFindx(ng)=0
            CALL impulse (ng, iADM, ADJname(ng))
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Integrate tangent linear model forced by the convolved adjoint
!  trajectory (impulse forcing) to compute R_n * PSI at observation
!  points.
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
!  Initialize tangent linear model from rest. The initial contribution 
!  from the adjoint model will be added as impulse forcing in
!  "tl_forcing".
! 
            wrtNLmod(ng)=.FALSE.
            wrtTLmod(ng)=.TRUE.
            CALL tl_initial (ng)
            IF (exit_flag.ne.NoError) THEN
              IF (Master) THEN
                WRITE (stdout,10) Rerror(exit_flag), exit_flag
              END IF
              RETURN
            END IF
!
!  Set tangent linear history NetCDF parameters.  Define tangent linear
!  history file at the beggining of each inner loop  to avoid opening
!  too many NetCDF files.
!
            IF (inner.gt.1) LdefTLM(ng)=.FALSE.
            NrecTLM(ng)=0
            tTLMindx(ng)=0
!
!  Run tangent linear model forward and force with convolved adjoint
!  trajectory impulses. Compute R_n * PSI at observation points which
!  are used in the conjugate gradient algorithm.
!
            IF (Master) THEN
              WRITE (stdout,30) 'TL', ntstart, ntend
            END IF

            time(ng)=time(ng)-dt(ng)

            TL_LOOP : DO my_iic=ntstart,ntend+1

              iic(ng)=my_iic
#ifdef SOLVE3D
              CALL tl_main3d (ng)
#else
              CALL tl_main2d (ng)
#endif
              IF (exit_flag.ne.NoError) THEN
                IF (Master) THEN
                  WRITE (stdout,10) Rerror(exit_flag), exit_flag
                END IF
                RETURN
              END IF

            END DO TL_LOOP
            wrtNLmod(ng)=.FALSE.
            wrtTLmod(ng)=.FALSE.
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Use conjugate gradient algorithm to find a better approximation
!  PSI to representer coefficients Beta_n. Exit inner loop if
!  convergence is achieved.
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
            Nrun=Nrun+1
            CALL congrad (ng, outer, inner, converged)
            IF (converged) EXIT INNER_LOOP

          END DO INNER_LOOP
!
!  Close tangent linear NetCDF file.
!
          status=nf_close(ncTLMid(ng))
          ncTLMid(ng)=-1
!
!-----------------------------------------------------------------------
!  Once that the representer coefficients, Beta_n, have been
!  approximated with sufficient accuracy, compute estimates of
!  Lambda_n and Xhat_n by carrying out one backward intergration
!  of the adjoint model and one forward itegration of the representer
!  model.
!-----------------------------------------------------------------------
!
!  Initialize the adjoint model always from rest.
!
          CALL ad_initial (ng)
          IF (exit_flag.ne.NoError) THEN
            IF (Master) THEN
              WRITE (stdout,10) Rerror(exit_flag), exit_flag
            END IF
            RETURN
          END IF
!
!  Set adjoint history NetCDF parameters.  Define adjoint history
!  file one to avoid opening to many files.
!
          IF (Nrun.gt.1) LdefADJ(ng)=.FALSE.
          NrecADJ(ng)=0
          tADJindx(ng)=0
!
!  Time-step adjoint model backwards forced with estimated representer
!  coefficients, Beta_n.
!
          IF (Master) THEN
            WRITE (stdout,30) 'AD', ntstart, ntend
          END IF

          time(ng)=time(ng)+dt(ng)

          AD_LOOP2 : DO my_iic=ntstart,ntend,-1

            iic(ng)=my_iic
#ifdef SOLVE3D
            CALL ad_main3d (ng)
#else
            CALL ad_main2d (ng)
#endif
            IF (exit_flag.ne.NoError) THEN
              IF (Master) THEN
                WRITE (stdout,10) Rerror(exit_flag), exit_flag
              END IF
              RETURN
            END IF

          END DO AD_LOOP2

#ifdef CONVOLVE
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Convolve adjoint trajectory with model-error covariance and convert
!  to impulse forcing.
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
          Nrec=NrecADJ(ng)
          NrecADJ(ng)=0
          tADJindx(ng)=0
          LwrtState2d(ng)=.TRUE.
          WRITE (stdout,40) outer, 0
!
          DO rec=1,Nrec
!
!  Read adjoint solution.
!
            CALL get_state (ng, iADM, 4, ADJname(ng), rec, Lold(ng))
!
!  First, multiply adjoint solution by the background-error standard
!  deviations.  Second, convolve resulting adjoint solution with the
!  adjoint diffusion operator which embeds background-error spatial
!  correlations. Notice that the spatial convolution is only done
!  for half of the diffusion steps.
!
!$OMP PARALLEL DO PRIVATE(ng,thread,subs,tile)                          &
!$OMP&            SHARED(inner,numthreads)
            DO thread=0,numthreads-1
              subs=NtileX(ng)*NtileE(ng)/numthreads
              DO tile=subs*thread,subs*(thread+1)-1
                CALL ad_variability (ng, TILE, Lold(ng))
                CALL ad_convolution (ng, TILE, Lold(ng), 2)
              END DO
            END DO
!$OMP END PARALLEL DO
!
!  To insure symmetry, convolve resulting filtered adjoint solution
!  from above with the tangent linear diffusion operator for the
!  other half of steps. Then, multiply result with its corresponding
!  background-error standard deviations.
!
            add=.FALSE.
!$OMP PARALLEL DO PRIVATE(ng,thread,subs,tile)                          &
!$OMP&            SHARED(inner,add,numthreads)
            DO thread=0,numthreads-1
              subs=NtileX(ng)*NtileE(ng)/numthreads
              DO tile=subs*thread,subs*(thread+1)-1,+1
                CALL load_ADtoTL (ng, TILE, Lold(ng), Lold(ng), add)
                CALL tl_convolution (ng, TILE, Lold(ng), 2)
                CALL tl_variability (ng, TILE, Lold(ng))
                CALL load_TLtoAD (ng, TILE, Lold(ng), Lold(ng), add)
              END DO
            END DO
!$OMP END PARALLEL DO
!
!  Overwrite ADJname history NetCDF file with convolved adjoint
!  solution.
!
            kstp(ng)=Lold(ng)
# ifdef SOLVE3D
            nstp(ng)=Lold(ng)
# endif
            CALL ad_wrt_his (ng)
            IF (exit_flag.ne.NoError) RETURN
          END DO
          LwrtState2d(ng)=.FALSE.
#endif
!
!  Compute new nonlinear model initial conditions by adding convolved
!  adjoint solution (Beta(t=0), currently in adjoint state Lold index)
!  to the background state.
!
          CALL get_state (ng, iNLM, 1, INIname(ng), Lbck, Lini)
!$OMP PARALLEL DO PRIVATE(ng,thread,subs,tile)                          &
!$OMP&            SHARED(numthreads)
          DO thread=0,numthreads-1
            subs=NtileX(ng)*NtileE(ng)/numthreads
            DO tile=subs*thread,subs*(thread+1)-1
              CALL ini_adjust (ng, TILE, Lold(ng), Lini)
              CALL ini_fields (ng, TILE, iNLM)
            END DO
          END DO
!$OMP END PARALLEL DO
!
!  Write out new nonlinear model initial conditions.
!
          IF (LcycleINI(ng)) THEN
            tINIindx(ng)=0
            NrecINI(ng)=1
          END IF
          CALL wrt_ini (ng, Lini)
#ifdef DISTRIBUTE
          CALL mp_bcasti (ng, iNLM, exit_flag, 1)
#endif
          IF (exit_flag.ne.NoError) RETURN
!
!  Convert convolved adjoint solution to impulse forcing. Write out
!  impulse forcing into TLFname NetCDF file. To facilitate the forcing
!  by the TLM and RPM, the forcing is process and written in
!  increasing time coordinates.
!
          WRITE (stdout,50) outer, 0
          tTLFindx(ng)=0
          CALL impulse (ng, iADM, ADJname(ng))
!
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!  Run representer model and compute a "new estimate" of the state
!  trajectory, X_n(t).
!:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
!
!  Set previous and new basic state trajectory.
!
          LdefTLM(ng)=.TRUE.
          LwrtTLM(ng)=.TRUE.
          lstr=LEN_TRIM(FWDbase(ng))
          WRITE (TLMname(ng),20) FWDbase(ng)(1:lstr-3), outer
          WRITE (FWDname(ng),20) FWDbase(ng)(1:lstr-3), outer-1
!
!  Initialize representer model with background or reference state. The
!  initial contribution from the adjoint model, Beta_n(0), will be added
!  as impulse forcing in "tl_forcing".
!
          wrtNLmod(ng)=.FALSE.
          wrtTLmod(ng)=.TRUE.
          CALL rp_initial (ng)
          IF (exit_flag.ne.NoError) THEN
            IF (Master) THEN
              WRITE (stdout,10) Rerror(exit_flag), exit_flag
            END IF
            RETURN
          END IF
!
!  Run representer model using previous linearized trajectory, X_n-1, as
!  basic state and forced with convolved adjoint trajectory impulses.
!
          IF (Master) THEN
            WRITE (stdout,30) 'RP', ntstart, ntend
          END IF

          time(ng)=time(ng)-dt(ng)

          RP_LOOP2 : DO my_iic=ntstart,ntend+1

            iic(ng)=my_iic
#ifdef SOLVE3D
            CALL rp_main3d (ng)
#else
            CALL rp_main2d (ng)
#endif
            IF (exit_flag.ne.NoError) THEN
              IF (Master) THEN
                WRITE (stdout,10) Rerror(exit_flag), exit_flag
              END IF
              RETURN
            END IF

          END DO RP_LOOP2
          wrtNLmod(ng)=.FALSE.
          wrtTLmod(ng)=.FALSE.
!
!  Set basic state trajectory file to representer model file.  Close
!  current forward NetCDF file.
!
          FWDname(ng)=TLMbase(ng)
          status=nf_close(ncFWDid(ng))
          ncFWDid(ng)=-1

        END DO OUTER_LOOP

      END DO NEST_LOOP
!
 10   FORMAT (/,a,i3,/)
 20   FORMAT (a,'_',i2.2,'.nc')
 30   FORMAT (/,1x,a,1x,'ROMS/TOMS: started time-stepping:',            &
     &        '( TimeSteps: ',i8.8,' - ',i8.8,')',/)
 40   FORMAT (/,' Convolving Adjoint Tracjectory: Outer = ',i3.3,       &
     &          ' Inner = ',i3.3)
 50   FORMAT (/,' Converting Convolved Adjoint Tracjectory to',         &
     &          ' Impulses: Outer = ',i3.3,' Inner = ',i3.3,/)

      RETURN
      END SUBROUTINE run

      SUBROUTINE finalize
!
!=======================================================================
!                                                                      !
!  This routine terminates ROMS/TOMS nonlinear, tangent linear, and    !
!  adjoint models execution.                                           !
!                                                                      !
!=======================================================================
!
      USE mod_param
      USE mod_parallel
      USE mod_iounits
      USE mod_ncparam
      USE mod_scalars
!
!  Local variable declarations.
!
      integer :: ng, thread
!
!-----------------------------------------------------------------------
!  If blowing-up, save latest model state into RESTART NetCDF file.
!-----------------------------------------------------------------------
!
!  If cycling restart records, write solution into record 3.
!
      DO ng=1,Ngrids
        IF (LwrtRST(ng).and.(exit_flag.eq.1)) THEN
          IF (Master) WRITE (stdout,10)
 10       FORMAT (/,' Blowing-up: Saving latest model state into ',     &
     &              ' RESTART file',/)
          IF (LcycleRST(ng).and.(NrecRST(ng).ge.2)) THEN
            tRSTindx(ng)=2
            LcycleRST(ng)=.FALSE.
          END IF
          blowup=exit_flag
          exit_flag=NoError
          CALL wrt_rst (ng)
        END IF
      END DO
!
!-----------------------------------------------------------------------
!  Stop model and time profiling clocks.  Close output NetCDF files.
!-----------------------------------------------------------------------
!
!  Stop time clocks.
!
      IF (Master) THEN
        WRITE (stdout,20)
 20     FORMAT (/,'Elapsed CPU time (seconds):',/)
      END IF

      DO ng=1,Ngrids
!$OMP PARALLEL DO PRIVATE(ng,thread) SHARED(numthreads)
        DO thread=0,numthreads-1
          CALL wclock_off (ng, iNLM, 0)
        END DO
!$OMP END PARALLEL DO
      END DO
!
!  Close IO files.
!
      CALL close_io

      RETURN
      END SUBROUTINE finalize

      END MODULE ocean_control_mod
