module newton_mod

   use dual_problem_mod
   use dwr_mod
   use element_mod
   use elemental_mod
   use estimates
   use euler_problem
   use matrix_oper
   use matrix_oper_int
   use main_data
   use mesh_mod
   use paramets
   use solution_mod
   use terms_mod

implicit none

   public :: EstimateNewtonADwr
   public :: EstimateNewtonADwrFixed
   public :: EstimateNewtonARes
   public :: EstimateNewtonNLDwr
   public :: localNewtonEstimate
   public :: Newton_output
   public :: NewtonSolve
   public :: performOneNewtonStdgmStep
   public :: setAndersonInNewton
   public :: setDampingInNewton
   !public :: setDampingInNewtonLocal
   public :: setDampingInNewton_aDWR


contains


   ! seeking of optimal damping parameter in the Newton method,
   ! this should help the convergence of Newton when the approximation is still rough
   subroutine setDampingInNewton( newton, grid, deg_plus, eta, loc_implicitly, &
                                  res_max_val, max_l, l, ratio, DWR)
      class( Newton_t ), intent ( inout ) :: newton
      class( mesh ), intent(inout) :: grid
      logical, intent(in) :: deg_plus
      real, intent(in) :: eta
      logical, intent(in) :: loc_implicitly
      real, intent(in) :: res_max_val
      integer, intent(in) :: max_l ! max number of damping iterations
      integer, intent(out) :: l ! number of damping iterations
      real, intent(out) :: ratio ! number of damping iterations
      type( DWR_t), intent(inout), optional :: DWR
      class(element), pointer :: elem
      integer :: kk, elemDof, i, k, j, p_mod, q_mod, nsize
      real :: lambda_exit
      logical :: plus, BiCG


      ! the use of BiCG method for DWR
      BiCG = .false.
      if(present(DWR) ) then
         if( DWR%BI) BiCG = .true.
      endif

      lambda_exit = ratio**(max_l - 1)

      newton%lambda = min( newton%lambda1, 1.0)   ! initialization of lambda (= damping factor)
      !newton%lambda = 1.0   ! initialization of lambda (= damping factor)
      !newton%lambda1 = 1.0   ! initialization of lambda (= damping factor)
       !max_l = 10

      !print*,'                            ___  _ _ -'
       do l=1,max_l   ! iterations, seeking the optimal damping factor
       !   write(*,'(a10, 3i5, 3es12.4)') &
       !        'ITERS:',state%time%iter, Newton%iter, l, newton%lambda, newton%lambda1

          ! Update of the solution
          kk = 0
          do i = 1, grid%nelem
             !!lambda_loc = newton%lambda
             elem => grid%elem(i)
             if (elem%ncv /= kk + 1) then
                print*, 'Problem while copying update to wST (compute.f90)'
                stop
             endif
             elemDof = elem%dof * ndim * elem%Tdof
             elem%vec( res_vec, 1:elemDof) =  newton%rr(kk+1:kk+ elemDof)

             do k = 1, elem%Tdof
                do j = 1, ndim

                   elem%wST(j,1:elem%dof,k) = elem%wST(j,1:elem%dof,k) &
                        + (newton%lambda - newton%lambda_old) &
                        * newton%x(kk + 1 : kk + elem%dof)

                   kk = kk + elem%dof
                   !if(elem%i <=4) &
                   !     write(*,'(a10, 2i5, 300es14.6)') 'b_sol:',elem%i, elemDof,  elem%wST(j,1:elem%dof,k)
                enddo !j
             enddo !k

             if(state%linSolver%lin_solver_not_conv > 0) then
                if(sqrt( dot_product(elem%vec( res_vec, 1:elemDof), elem%vec( res_vec, 1:elemDof))) > &
                     0.5 * res_max_val ) then
                   write(53,*), elem%xc(:), elem%i, &
                        sqrt(dot_product(elem%vec(res_vec, 1:elemDof), elem%vec( res_vec, 1:elemDof))),&
                        res_max_val, elem%wST(1:ndim,1:elem%dof,:), &
                        '  state%linSolver%lin_solver_not_conv te3tg73'
                endif
             endif

          enddo !i

          ! wST was updated - residuum needs to be recomputed
          call state%state_of_terms%new_w_was_computed()

          if(state%linSolver%lin_solver_not_conv > 0 ) close(53)

          newton%lambda_old = newton%lambda

          !call ComputeSTDGM_Terms( deg_plus )
          !print*, 'ComputeST_Terms in setDampingInNewton, deg_plus=', &
          !  deg_plus, "impl = ", newton%implicitly
          call ComputeST_Terms( deg_plus )

          !print*,' PM test (3)'
          !call ComputeSTDGM_Terms(deg_plus ) ! fill elem%rhST

          call FillVectorST( newton%b1(1:state%nsize), eta )

          newton%res0 = VectorPrecondNorm( newton%b1 )
          !print*,'EEEEEEEEEEEEEE!!!!',norm2(Newton%b1)



!          print*, 'new Newton res :' , l,  newton%res0
!          print*, 'old Newton res :' , l, newton%res
          newton%theta  =  newton%res0  / max(1E-15, newton%res)

          !print*,'#########################################################'
          !if(l == 1) print*
          !write(*,'(a25, 3i5, 30es12.4)') '####### new vector:', &
          !     l, max_l, Newton%iter, &
          !     newton%lambda,lambda_exit, newton%res0,newton%res, newton%theta
          !print*,'#########################################################'
          !newton%b1(1:5)

          if(newton%iter > 1) call Newton_output(grid, 55, l, loc_implicitly )

          if(loc_implicitly) call Newton_output( grid, 54, l, loc_implicitly )

          newton%newton_count = newton%newton_count + 1


          newton%lambda1 = newton%lambda/ratio/ratio   ! used for the new Newton iteration
          if(newton%lambda <= lambda_exit ) goto 15  ! too small lambda, NOT CONVERGE
          !if(newton%lambda < 2E-1) goto 15  ! too small lambda, NOT CONVERGE


          ! residuum is not decreasing, reduce lambda
          !if(l < max_l .and. newton%theta >= 1 .and. newton%res0 > 1E-8 ) then  problematic for shock
          if(l < max_l .and. newton%theta >= 1  ) then
          !if(l < max_l .and. newton%theta >= 0.9  ) then
          !if(l < max_l .and. newton%theta >= 0.9 .and. newton%res0 > 1E-8 ) then
             !if(newton%theta > 1 - newton%lambda/4  .and. newton%res0 > 1E-8 )then
             !newton%lambda = min(mu1, 0.5* newton%lambda)
             !newton%lambda = 0.85* newton%lambda
             !newton%lambda = 0.75* newton%lambda
             !newton%lambda = 0.5* newton%lambda
             newton%lambda = ratio * newton%lambda

          else

             ! residuum decreases, quit iterations
             goto 15
          endif


       enddo   !l=1,10    ! iterations, seeking the optimal damping factor

15     continue


       ! update of the dual solution computed by BiCG
       if( BiCG ) then

          p_mod = state%getP_mod()
          q_mod = state%getQ_mod()

          nsize = state%bigNSize( p_mod, q_mod )
          if( nsize /= size( newton%xD, 1 ) ) then
             print*,'Troubles in arrays sizes in Newton.f90 73y7u3',  nsize, size( newton%xD,1 )
             stop
          endif


          if( p_mod > 0) then
             print*,'state%getP_mod() > 0 = ', state%getP_mod()
             print*,'this subroutine has to be re-written, also the elem%wST'
             print*, 'setting of plus below'
             stop
          endif

          ! false - ,use zST, true - use zST_plus
          plus = .false.

          ! copy the solution vector back to elements zST
          if (plus) then
             call CopyZSTplus_fromLongVector( grid, nsize, p_mod, q_mod, newton%xD(1:nsize) )
          else
             ! optional parameter .true. means "update", zST = zST + newton%xD
             ! otherwise zST = newton%xD
             call CopyZST_fromLongVector( grid, nsize, newton%xD(1:nsize), .true. )
             !print*, "Size of long vector:", size(newton%xD), nsize
             !call CopyZST_fromLongVector( grid, nsize, DWR%x(1:nsize) )
          end if


          !print*, "FR: We do not need dual residual in newton iterations?!"
!          ! Fill residual vector - the residuum is always of the BIGGEST SIZE
!          ! CHANGE P_MOD, Q_MOD
!          call state%setP_mod( state%p_mod_max)
!          call state%setQ_mod( state%q_mod_max)
!
!          ! fills the Dual residuals
!          call computeDualResidualPlus(grid, DWR, plus)
!
!          ! setting of the RHS = primal residuals
!          call ComputeST_Terms( .true. )
!
!          !print*,' PM test (4)'
!          !call ComputeSTDGM_Terms(.true. ) ! fill elem%rhST
!
!
!          !write(*,'(a10, i5, 200es12.4)') 'damp WX:', grid%elem(1)%i,  grid%elem(1)%rhsST
!
!          call state%setP_mod( p_mod )
!          call state%setQ_mod( q_mod )
!
!          DWR%dualProblem_computed = .true.
!
!          ! ONLY OUTPUT
!          ! do i = 1, grid%nelem
!          !    elem => grid%elem(i)
!          !    do k = 1, elem%Tdof
!          !       do j = 1, ndim
!          !          kk = kk + elem%dof
!          !          if(elem%i <=4) &
!          !               write(*,'(a10, 2i5, 300es14.6)') 'D_sol:',elem%i, elemDof,  elem%zST(j,1:elem%dof,k)
!          !       enddo !j
!          !    enddo !k
!          ! enddo

       endif

   end subroutine setDampingInNewton



   !> seeking of optimal damping parameter in the Newton method,
   !> using Anderson acceleration
   !> Not tested, copied from the old subroutine
   subroutine setAndersonInNewton( newton, grid, deg_plus, eta, loc_implicitly)
      class( Newton_t ), intent ( inout ) :: newton
      class( mesh ), intent(inout) :: grid
      logical, intent(in) :: deg_plus
      real, intent(in) :: eta
      logical, intent(in) :: loc_implicitly
      class(element), pointer :: elem
      integer :: l, i, j, nsize
      integer :: mk, AA_F, AA_m, AA_w, AA_dF, AA_max
      real, dimension(:,:), allocatable :: dd1
      real, dimension(:,:), pointer :: dd
      real, dimension(:), pointer :: alpha
      real :: vall

      !write(*,'(a8, 3i5, 1es10.2, a1, 30es12.4)') 'newton:',state%time%iter, Newton%iter, l, &
      !     Newton%lambda,'|', &
      !     Newton%res0, Newton%res, Newton%theta, state%linSolver%residuum

      nsize = state%nsize

      AA_F = newton%andersonAcc%AA_F
      AA_m = newton%andersonAcc%AA_m
      AA_dF = newton%andersonAcc%AA_dF
      AA_w = newton%andersonAcc%AA_w
      AA_max = newton%andersonAcc%AA_max


      !! ANDERSON ACCELERATION
      ! maximal number of used vectors
      !mk = min( newton%iter, AA_max) - 1
      mk = min( newton%iter -1, AA_max)

      allocate( dd1(1:mk, mk) )  ! temporary arrat

      ! (AA_w, i, :) contains \Delta g_i = g_{k-1}- g_{k-i+1},  g_i:= G(u^i) - u^i
      ! (AA_F, i, :) contains \Delta G_i = G_{k-1}- G_{k-i+1},

      alpha => newton%andersonAcc%alpha(1:mk)
      dd => newton%andersonAcc%dd(1:mk, 1:mk)

      ! shifting the vectors from the previous iterations
      do l =  mk, 2, -1
         newton%andersonAcc%AA_x(1:AA_m, l, 1:nsize) = newton%andersonAcc%AA_x(1:AA_m, l-1, 1:nsize)
      enddo

      ! shifting the matrix
      dd1(2:mk, 2:mk) = dd(1:mk-1, 1:mk-1)
      dd(2:mk, 2:mk) = dd1(2:mk, 2:mk)

      !! new values for the  Anderson acceleration:
      ! newton%andersonAcc%AA_update(1:nsize) is now a temporary field, now it contains g_k

      ! var (i) : difference  G(w^k) - w^k := d^{k+1} = w^{k+1} - w^{k}
      newton%andersonAcc%AA_update(1:nsize) =  newton%x(1:nsize) * newton%lambda

      ! var (ii) : difference  G(w^k) - w^k := F^{k+1}
      !newton%andersonAcc%AA_update(1:nsize) =  newton%b1(1:nsize) !* Newton%lambda

      ! var (iii) : difference  G(w^k) - w^k :=  P F^{k+1}
      !call VectorPrecond(newton%b1(1:nsize), newton%andersonAcc%AA_update(1:nsize) )


      ! the new difference  \Delta g_1,  (AA_w, 0, :) contains g_{k-1} at this moment
      newton%andersonAcc%AA_x(AA_w, 1, 1:nsize) = &
           newton%andersonAcc%AA_update(1:nsize) -  newton%andersonAcc%AA_x(AA_w, 0, 1:nsize)

      ! storing of the new g_{k-1} = g_k
      newton%andersonAcc%AA_x(AA_w, 0, 1:nsize) = newton%andersonAcc%AA_update(1:nsize)


      !  newton%andersonAcc%AA_update(1:nsize) is now a temporary field, now it contains G(u^k)
      call CopyWST_toLongVector( grid, nsize, newton%andersonAcc%AA_update(1:nsize) )

      ! var (i) already OK

      ! var (ii)
      !newton%andersonAcc%AA_update(1:nsize) = newton%andersonAcc%AA_update(1:nsize) &
      !     -  newton%x(1:nsize) * newton%lambda


      ! the new difference  \Delta G_1,  (AA_F, 0, :) contains G_{k-1}
      newton%andersonAcc%AA_x(AA_F, 1, 1:nsize) = &
           newton%andersonAcc%AA_update(1:nsize) -  newton%andersonAcc%AA_x(AA_F, 0, 1:nsize)

      ! storing of the new G_{k-1} = G_k
      newton%andersonAcc%AA_x(AA_F, 0, 1:nsize) = newton%andersonAcc%AA_update(1:nsize)


      !do i=1,max(mk,0)
      !   write(*,'(a8, i5, 30es12.4)') ' Delta g::',i, newton%andersonAcc%AA_x(AA_w, i,1:6)
      !enddo
      !print*
      !do i=1,max(mk,0)
      !   write(*,'(a8, i5, 30es12.4)') ' Delta G::',i, newton%andersonAcc%AA_x(AA_F, i,1:6)
      !enddo
      !print*
      !write(*,'(a8, i5, 30es12.4)') ' G(u^k)::',0, newton%andersonAcc%AA_update(1:6)

      ! Anderson update
      if(mk == 0) then
         ! NO ACTION

       else

          ! computing of the scalar products
          do i=1, mk
             ! RHS
             alpha(i) = -dot_product(newton%andersonAcc%AA_x(AA_w, 0, :),  &
                  newton%andersonAcc%AA_x(AA_w, i,: ))

             ! matrix
             dd(1,i) = dot_product(newton%andersonAcc%AA_x(AA_w, 1, :), &
                     newton%andersonAcc%AA_x(AA_w, i, : )  )
             dd(i,1) = dd(1,i)

          enddo

          !print*,'.,.'
          !do i=1,mk
          !   write(*,'(a8, i5, 30es12.4)') 'dd:',i, dd(i, :)
          !enddo
          !print*,' ___'
          !write(*,'(a8, i5, 30es12.4)') 'RHS:',mk, alpha(:)
          !print*

          ! assembling of the linear algebraic problem
          dd1(1:mk, 1:mk) = dd(1:mk, 1:mk)
          !newton%andersonAcc%alpha(1:mk) =  -newton%andersonAcc%dd(1:mk, 0)
          call SolveLocalMatrixProblem(mk, dd1(1:mk, 1:mk), 1, alpha(1:mk) )

          !write(*,'(a8, i5, 30es12.4)') 'gamma:',mk, alpha(:)

          ! setting of the new update,   newton%andersonAcc%AA_update  already contains G(u^k)
          do i=1,mk
             newton%andersonAcc%AA_update(:) = newton%andersonAcc%AA_update(:) &
                  + newton%andersonAcc%alpha(i) *  newton%andersonAcc%AA_x(AA_F, i, :)
             !write(*,'(a10, 300es12.4)') 'update:', Newton%AA_update(1:10)
          enddo

          ! setting the new solution to wST
          !call Fill_wST_from_VectorST( newton%andersonAcc%AA_update(:) )
          ! in solution_mod
          call CopyWST_fromLongVector( grid, nsize, newton%andersonAcc%AA_update(:) )

          ! update rhs of the new system
          !call ComputeSTDGM_Terms( deg_plus )
!          print*, 'ComputeSTDGM_Terms was replaced by ComputeST_Terms in setAndersonInNewton, impl = ', &
!            state%nlSolver%implicitly
          call ComputeST_Terms( deg_plus )

          !print*,' PM test (5) '
          !call ComputeSTDGM_Terms(deg_plus ) ! fill elem%rhST


          ! put the rhs into newton%b1
          call FillVectorST( newton%b1(1:nsize), eta )
          ! the actual residuum

          newton%res0 = VectorPrecondNorm( newton%b1 )


          !write(*,'(a8, 3i5, 1es10.2, a1, 30es12.4)') 'anderson:',state%time%iter, Newton%iter, -1, &
          !     Newton%lambda,'|', &
          !     Newton%res0, Newton%res, Newton%theta, state%linSolver%residuum
          !
          !print*,'___________________________________________________________________________'
          !stop "i30id03ik3ew"

       endif

       !print*,'_______________________________________________________', mk


       deallocate(dd1)

   end subroutine setAndersonInNewton



   ! seeking of optimal damping parameter in the Newton method,
   ! this should help the convergence of Newton when the approximation is still rough
   subroutine setDampingInNewton_aDWR( newton, grid, DWR, deg_plus, eta, &
                                      loc_implicitly, res_max_val, max_l, l)
      class( Newton_t ), intent ( inout ) :: newton
      class( mesh ), intent(inout) :: grid
      class( DWR_t ), intent(inout) :: DWR
      logical, intent(in) :: deg_plus
      real, intent(in) :: eta   ! not used
      logical, intent(in) :: loc_implicitly
      real, intent(in) :: res_max_val
      integer, intent(in) :: max_l ! max number of damping iterations
      integer, intent(out) :: l ! number of damping iterations
      class(element), pointer :: elem
      integer :: kk, elemDof, i, k, j

      newton%lambda = 1.0   ! initialization of lambda (= damping factor)

      ! may not be needed
      ! compute the estimate for the old wST
      call computeNonlinDWRestimates( DWR, grid )
      newton%res = DWR%estimNL

      do l=1,max_l   ! iterations, seeking the optimal damping factor
         ! update of the solution
         !print*, 'newton lambda old:', newton%lambda_old

          kk = 0
          do i = 1, grid%nelem

             elem => grid%elem(i)
             if (elem%ncv /= kk + 1) then
                print*, 'Problem while copying update to wST (compute.f90)'
                stop
             endif
             elemDof = elem%dof * ndim * elem%Tdof
             elem%vec( res_vec, 1:elemDof) =  newton%rr(kk+1:kk+ elemDof)

             do k = 1, elem%Tdof
                do j = 1, ndim
                   elem%wST(j,1:elem%dof,k) = elem%wST(j,1:elem%dof,k) &
                        + (newton%lambda - newton%lambda_old) &
                        * newton%x(kk + 1 : kk + elem%dof)
                   kk = kk + elem%dof
                enddo !j
             enddo !k

             if(state%linSolver%lin_solver_not_conv > 0) then
                if(sqrt( dot_product(elem%vec( res_vec, 1:elemDof), elem%vec( res_vec, 1:elemDof))) > &
                     0.5 * res_max_val ) then
                   write(53,*), elem%xc(:), elem%i, &
                        sqrt(dot_product(elem%vec(res_vec, 1:elemDof), elem%vec( res_vec, 1:elemDof))),&
                        res_max_val, elem%wST(1:ndim,1:elem%dof,:)
                endif
             endif

          enddo !i

          if(state%linSolver%lin_solver_not_conv > 0 ) close(53)
          newton%lambda_old = newton%lambda

          !call ComputeSTDGM_Terms( deg_plus )
          ! rhsST -> newton%b1
          !call FillVectorST( newton%b1(1:state%nsize), eta )

          call computeNonlinDWRestimates( DWR, grid )
          newton%res0 = DWR%estimNL
!          print*, 'new Newton res :' , l,  newton%res0
!          print*, 'old Newton res :' , l, newton%res

          newton%theta  =  newton%res0  / max(1E-15, newton%res)

          if(newton%iter > 1) call Newton_output(grid, 55, l, loc_implicitly )
          if(loc_implicitly) call Newton_output( grid, 54, l, loc_implicitly )

          newton%newton_count = newton%newton_count + 1

          ! residuum is not decreasing, reduce lambda
          !
          if(newton%theta >= 1 .and. newton%res0 > 1E-8 ) then
             newton%lambda = 0.5* newton%lambda
          else
             newton%lambda1 = min(1., 1.5*newton%lambda )
             goto 16
          endif

          if(newton%lambda < 1E-2) goto 16  ! too small lambda, NOT CONVERGE

      enddo   !l=1,10    ! iterations, seeking the optimal damping factor

16    continue

   end subroutine setDampingInNewton_aDWR


   !> perform one step of the Newton method, i.e.
   !> solve the linear alg. system given by C(u)d^k = -F(u)
   !> solution of the problem is done by GMRES and its parameters: nloops and restart are set here !
   !> but other methods may be implemented in SolveBlockLinearSTDGMProblem later
   !> the tolerance on the linear algebraic error is saved in state%linSolver%tol
   !> When Newton%non_alg_stop == 'aDWR' then the DWR_t structure is needed to compute the estimates of the alg. error
   subroutine performOneNewtonStdgmStep( newton, grid, imp, iter, &
      deg_plus, newtonDone, res_max_val, loc_implicitly, DWR )
      class( NonlinearSol_t ), intent ( inout ) :: newton ! this should the Newton type
      class( mesh ), intent(inout) :: grid
      integer, intent(in) :: imp
      integer, intent(in) :: iter
      logical, intent(in) :: deg_plus
      logical, intent(out) :: newtonDone
      real, intent(out) :: res_max_val
      logical, intent(out) :: loc_implicitly
      type( DWR_t), intent(inout), optional :: DWR

      logical :: update, BiCG
      class(element), pointer :: elem
      integer :: max_l, l, kk, elemDof, i, k, j, p_mod, q_mod
      integer :: restart, nloops, nsize, bigNsize, ifile
      logical :: vector_update
      real, allocatable, dimension(:) :: vecTest, outTest, outTest2, vecTestBig
      real, allocatable, dimension(:) :: newtonB, NewtonX
      real, allocatable, dimension(:) :: x0, y0, b0, c0
      real :: xi_initP, xi_initD, J1, J2, J1a, J2a, xi_P, xi_D
      !real :: t1, t2, time_prepare, tt, tt1

      ! the use of BiCG method for DWR
      BiCG = .false.
      if(present(DWR) ) then
         if( DWR%BI) BiCG = .true.
      endif

      !print*,"performOneNewtonStdgmStep called !!!, bicg = ", BiCG, "deg_plus=", deg_plus

      xi_initP = 0.
      xi_initD = 0.

      newtonDone = .false.

      call state%cpuTime%startPrepareTime()
      if(imp == 0)  newton%implicitly = .true.

      if ( state%nlSolver%non_alg_stop == 'aDWR') then
         write(debug,*) 'There may be problems with implicitly!'
         !print*, 'Probably in the matrix there are values with deg_plus!'
         write(debug,*) 'Set implicitly = .true.'
         newton%implicitly = .true.
      endif

      loc_implicitly = newton%implicitly
      update = .false.

      !print*
      !print*,'newton%implicitlyDE#G=',newton%implicitly, state%nlSolver%iter

      if( newton%implicitly ) then
          update = .true.
          !deg_plus = .true.

          if (state%space%estim_space == "DWR" .and. state%model%linear) then ! VD Newton
             !print*, "linProblem: ComputeST_Terms in performOneNewtonStdgmStep called with impl true, deg_plus = true"
             call ComputeST_Terms( .true. )
          else
             !print*, "nonlinProblem: ComputeST_Terms in performOneNewtonStdgmStep called with impl true, deg_plus = false"
             call ComputeST_Terms( .false. )

          end if
          !call ComputeST_Terms( .true. )

          !print*, 'PM test ComputeSTDGM_Terms commented with impl true! ',' deg_plus=', deg_plus
          !call ComputeSTDGM_Terms( .false. )
          !!call ComputeSTDGM_Terms( deg_plus )

          !call WriteMatrixST_Blocks(0.)

          newton%newton_count = 0
          newton%updates = newton%updates + 1
          newton%implicitly = .false.

          !print*, 'ComputeST_Terms in performOneSTDGMStep with impl = false degPlus= ', deg_plus
          !write(31,'(a10, i5, 200es12.4)') 'RHS ..:', grid%elem(1)%i,  grid%elem(1)%rhsST

          call ComputeST_Terms( deg_plus ) ! fill elem%rhST

          newton%implicitly = .true.
          vector_update = .true.
          newton%implicitly = .false.

      ! computing with the matrices from previous time step ?
      else

          ! for iter > 1, array b(:) = F(x^k)
          if(iter == 1) then
             !print*, 'ComputeST_Terms in performOneSTDGMStep, degPlus=', deg_plus, &
             !  "impl = ", newton%implicitly
             call ComputeST_Terms(deg_plus )

             vector_update = .true.
          else
             vector_update = .false.
             newton%b(1:state%nsize) = newton%b1(1:state%nsize)
             newton%res = newton%res0
          endif
      endif ! end of if(state%nlSolver%implicitly)


       eta = 1./state%time%tau(1)

       if(vector_update) then  !filling elem%rhsST into global rhs
          !state%nlSolver%pseudo_time%PTS_rho = 1.
          !print*,' PM test (X)'
          call FillVectorST(newton%b(1:state%nsize), eta ) ! value of eta is not used, only its sign
          newton%res = VectorPrecondNorm(newton%b)
          !print*,'EEEEEEEEEEEEEE????',norm2(newton%b)  ! BICG, residuum can increase after one NEWTON STEP,   WHY???????????????????
       endif

       if(iter == 1) newton%res_ref = newton%res

       ! first exit based on the absolute value of the residuum vector || F(w) ||
       !if(iter > 1 .and. newton%res < newton%tol &  DOES NOT WORKS !!
       !if(iter > 1 .and. newton%res/state%nsize < newton%tol &
       if(iter > 1 ) then
          if( newton%res < newton%tol .and. state%nlSolver%non_alg_stop /= 'aDWR' ) then
             print*,' #E#E#E#  NEWTON DONE STOP:' , newton%res/state%nsize , newton%tol
             newtonDone = .true.
          endif
       endif

       newton%iter  = newton%iter + 1
       newton%Aiter = newton%Aiter  + 1
       newton%TAiter = newton%TAiter  + 1

       newton%lambda_old = 0.
       newton%x(1:state%nsize) = 0.

       if (.not. state%linSolver%tol_fixed) then
          if(iter == 1) then
             state%linSolver%tol = 0.25
          else
             state%linSolver%tol = min(0.25, abs(newton%res - state%linSolver%residuum) / newton%res1)
             state%linSolver%tol = max(state%linSolver%tol, 1E-6)
          endif
       endif

       call state%cpuTime%addPrepareTime()

       !call WriteMatrixST_Blocks(eta)

       !call test_bMVprodST()
       !call Write_rhsST()
       !stop "u834398dj3"

       ! set number of iterations and number of nloops of lin alg. solver
       if (newton%non_alg_stop == 'aDWR') then
          ! set number of iter after which restart is done
          if ( present(DWR) ) then
             restart = DWR%aDWR%restart_primal
             nloops = 1
             DWR%aDWR%iter = DWR%aDWR%iter + 1
             
             ! set the right tolerance
             state%linSolver%tol = DWR%aDWR%linTol * DWR%aDWR%C_Safe
             !print*, 'nlTol set to : ' , DWR%aDWR%nlTol
             !print*, 'linTol set to : ' , state%linSolver%tol
             !if ( DWR%aDWR%nlTol < DWR%aDWR%linTol ) &
             !   stop 'nlTol is under linTol in DWR nonlinear solve'
             
             ! SOLVE THE SYSTEM
             !print*, 'SOLVE PRIMAL problem !'
             call PrimalLinSolver( DWR, grid, newton, state%linSolver%tol )
             
          else
             stop 'performOneNewtonStdgmStep must be called with DWR, if Newton%non_alg_stop == aDWR '
          endif
       else
         ! parameters for the linear (GMRES) solver
         restart = 50 !30
         nloops =  10 !5 ! 10  ! HERE TO CHANGE 1
         !if(state%time%maxiter == 43) nloops = 5 ! 2 ! SIMULATION of  aDWR stopping criterion

         !if( state%modelName == 'porous') nloops = 1


         if(state%model%varying_time_term) eta = 0. ! time deriv terms already included
         if(state%nlSolver%pseudoT) eta =  state%nlSolver%pseudo_time%PTS_rho

         !print*,'dwr iter state%space%estim_space == ', state%space%estim_space, iter
         ! preparation of the right hand-side for dual problem

         ! BiCG technique according [Strakos, Tichy, SISC 2011]
         !   A x = b_0, initial approximation x_0   <==>    A x~ = b_0 -   A x_0 = b
         ! A^T y = c_0, initial approximation y_0   <==>  A^T y~ = c_0 - A^T y_0 = c
         !  x~ = newton%x      y~ = newton%xD
         !  b  = newton%b      c  = newton%bD
         if(BiCG) then
            !XYZ  -removing of lines needeg for BiCG - JSC 2020 paper
            !XYZ  - not necessary for nonlinear problems

            if(iter == 1 .and. state%time%iter_loc == 1) then
               !print*,'DWR cleaning and preparing '
               call DWR%clean()
               !!call PrepareDualProblem( DWR )  - NOT NECESSARY
               !print*,'call DWR %J%findSupp( grid )'
               call DWR%J%findSupp( grid )
            endif

            p_mod = state%getP_mod()
            q_mod = state%getQ_mod()

            ! setting of the RHS for the dual problem
            ! RHS must be computed after ComputeSTDGM_Terms( )
            call DWR%setRHS( grid )

            bigNsize = state%getBigNSize()

            !print*,'call PrepareDualProblem_RHS( DWR, grid )'
            ! assembles dwr%rhs  -> newton%bD and
            !           elem%zST -> newton%xD
            ! call PrepareDualProblem_RHS( DWR, grid )
            call PrepareDualProblem_RHS(DWR, grid, bigNsize, newton%xD, newton%bD)

            if(p_mod > 0) then
               print*,'state%getP_mod() > 0 = ', state%getP_mod()
               print*,'this subroutine has to be re-written, also the elem%wST GESEJ'
               stop
            endif

            allocate(x0(1:bigNsize), y0(1:bigNsize))
            !XYZ  allocate(b0(1:bigNsize), c0(1:bigNsize))

            ! update of the RHS: A^T  z_0 (z_0 = zST)   DWR%rr is temporary array now
            ! call bMVprodBIG_Dual( DWR%rr(1: bigNsize), DWR%x(1:bigNsize) , bigNsize)
            call bMVprodBIG_Dual( newton%rrD(1: bigNsize), newton%xD(1:bigNsize), bigNsize)

            ! new RHS for the dual problem b = b - A^T z_0  ( a la Newton update)
            ! is set several lines below

            ! assemple elem%w_ST -> newton%x
            call FillVectorST_from_wST( newton%x(1: bigNsize ) )

            ! A x_0   !  NOT NECESSARY for REAL computation
            !XXXxxx
            !XYZ call bMVprodBIG( b0(1: bigNsize), newton%x(1:bigNsize) , bigNsize)
            ! b0 = A x_0 + b   <==>  b = b0 - A x_0
            !XXXxxx
            !XYZ b0(1:bigNsize) = b0(1:bigNsize) + newton%b(1:bigNsize)
            !XYZ c0(1:bigNsize) = newton%bD(1:bigNsize)
            !XXX

            ! init the value of the functional (see [Strakos, Tichy, SISC 2011]
            J1 = dot_product (newton%x(1: bigNsize), newton%bD(1: bigNsize) )
            J2 = dot_product (newton%xD(1: bigNsize), newton%b(1: bigNsize) )
            xi_initP = J1 + J2
            xi_P = J1 + J2
            !write(*, '(a30, 3es22.14)') &
            !     'INIT J(u) 1 (xi_i, c*x, y*b~ =', xi_initP,  J1, J2

            ! new RHS for the dual problem c = c - A^T z_0  ( a la Newton update)
            ! MUST be COMPATIBLE with the update of the dual solution ! VD DUAL BICG
            newton%bD(1:bigNsize) = newton%bD(1:bigNsize) - newton%rrD(1:bigNsize)

            !XXXxxx
            !XYZ  J1 = dot_product (newton%x(1: bigNsize), newton%bD(1: bigNsize) )
            !XYZ  J2 = dot_product( newton%xD(1: bigNsize), b0(1: bigNsize) )
            !XYZ xi_initD = J1 + J2
            !XYZ xi_D = J1 + J2

            !write(*, '(a30, 3es22.14)') &
            !    'INIT J(u) 2 (xi_i, c~*x, y*b =', J1 + J2, J1, J2
            !XXXxxx


            !XXX newton%rr(1: bigNsize ) = newton%x(1: bigNsize )
            !XXX NEEDED FOR new estimates
            x0(1: bigNsize ) = newton%x(1: bigNsize )  ! used in estP,estD in bicg.f90
            y0(1: bigNsize ) = newton%xD(1: bigNsize )

            ! initialization of updates
            newton%x(1: bigNsize ) = 0.
            newton%xD(1:bigNsize) = 0.

         endif ! if(BiCG)



         ! SOLVE THE SYSTEM
         call state%cpuTime%startSolveTime()
         ! for Ben Southworth
         ! writing of the sparse block matrix
         !call WriteBlockLinearSTDGMsystem(state%nsize, eta, newton%b, newton%x, &
         !     newton%iter, state%time%iter)
         !call WriteMatrixA_ST(eta)
         !print*,'eta = ', eta, state%time%tau(1)
         !call WriteMblock_Screene(grid%elem(1)%blockST(0) )

         if(state%space%adapt%adapt_level == 0 .and. state%nlSolver%Aiter <= 1) &
            print*, "This wont work for DWR plus"
         call state%setP_mod( 0 )
         call state%setQ_mod( 0 )
         bigNsize = state%getBigNSize()


         !  simultaneous solution of the primal and dual problems  using of BiCG method
         !if (state%space%estim_space == 'DWR' .and.  DWR%BI) then
         if (BiCG ) then
            restart = 10   ! DEFINES ALSO  update_iter = \NU
            !if(iter > 1) restart = 3
            !nloops = 100 !5 ! 10
            nloops = 100 !1000  !40 ! 50 !5 ! 10
            !if(state%time%maxiter == 43) nloops = 10  ! SIMULATION of  aDWR stopping criterion

            call SolveBlockLinearBigPrimalDualProblem(bigNsize, &
                 eta, newton%b,  newton%bD, newton%x, newton%xD, &
                 state%linSolver%residuum, state%linSolver%iter, &
                 state%linSolver%lin_solver_not_conv, restart, nloops, xi_initP, xi_initD, &
                 x0(1:bigNsize), y0(1:bigNsize)) ! , b0(1:bigNsize), c0(1:bigNsize))-optional

            !print*,'after  sx:',norm2( newton%x), norm2( DWR%x)
            !k = 6
            !do i=0,bigNsize, k
            !!    !write(*,'(a10, 2i5, 300es14.6)') 'init solD:', &
            !   write(30+state%time%iter,'(a10, 2i5, 300es24.16)') 'init solD:', &
            !        i+1, k, newton%x(i+1: min(bigNsize, i+k) )
            !enddo
            !k = 6
            !do i=0,bigNsize, k
            !!    !write(*,'(a10, 2i5, 300es14.6)') 'init solD:', &
            !   write(40+state%time%iter,'(a10, 2i5, 300es24.16)') 'init solD:', &
            !        i+1, k, x0(i+1: min(bigNsize, i+k) )
            !enddo

            ! writting of the matrices and vectors
            !if(mod(state%space%adapt%adapt_level, 4) == 0) &
            !     call WriteBiCG_Matlab_All(bigNsize, state%space%adapt%adapt_level, &
            !     newton%b,  DWR%b, newton%x,  DWR%x )

            !!call WriteBIGmatrixMatlab_Naive(bigNsize)
            !!call WriteBIGmatrixMatlab_Simple(bigNsize)
            !call WriteBIGmatrixMatlab(bigNsize)  ! faster but not preconditioner
            !call WriteBIGVectorMatlab(bigNsize, newton%b,  DWR%b, "rhs_b", "rhs_c")
            !call WriteBIGVectorMatlab(bigNsize, newton%x,  DWR%x, "sol_b", "sol_c")


            ! output quantity resulting from BiCG solver
            DWR%J%Ju_BiCG = xi_initP  ! J'(w)u^k
            DWR%J%Ju_BiEE = xi_initD  ! J'(w)(u^{k+\nu} - u^k)
!            print*, 'RHS BiCG: ', size(newton%b) , norm2(newton%b)
!
!             print*, 'Sol bicg:', size(newton%x) , norm2(newton%x), &
!               size(newton%xD) , norm2(newton%xD)
!             print*, "residuum, not_conv, restart, nloops:" , state%linSolver%residuum, &
!               state%linSolver%lin_solver_not_conv, restart, nloops

            deallocate(x0, y0)
            !XYZ deallocate(b0, c0)

         else


            call SolveBlockLinearBigProblem(bigNsize, &
                 eta, newton%b, newton%x, &
                 state%linSolver%residuum, & ! state%linSolver%iter, &
                 state%linSolver%lin_solver_not_conv, restart, nloops)

         endif

         ! ! COMPARISON BiCG x GMRES  -- START
         ! if (BiCG ) then
         !    ifile = 200
         ! else
         !    ifile = 100
         ! endif


         ! allocate(b0(1:bigNsize), x0(1:bigNsize))

         ! do i=1, -bigNsize
         !    x0 = 0.
         !    x0(i) = 1.
         !    call bMVprodBIG( b0(1: bigNsize), x0(1:bigNsize) , bigNsize)
         !    do j=1,bigNsize
         !       if(b0(j) /= 0.) write(ifile+ 5,*) i,j,b0(j)
         !    enddo

         ! enddo

         ! call bMVprodBIG( b0(1: bigNsize), newton%x(1:bigNsize) , bigNsize)
         ! b0(1: bigNsize) = b0(1: bigNsize) - newton%b(1:bigNsize)

         ! print*, "residuum b0:", norm2(b0)

         ! write(ifile+3,'(a16, 300es14.6)') 'REZ b0(:) = ', norm2(b0)

         ! k = 4
         ! do i=0,bigNsize-1, k
         !    write(ifile+1,'(a6, i5, 300es14.6)') 'rhss:',i+1, newton%b(i+1: min(bigNsize, i+k) )
         !    write(ifile+2,'(a6, i5, 300es14.6)') 'sols:',i+1, newton%x(i+1: min(bigNsize, i+k) )
         !    write(ifile+3,'(a6, i5, 300es14.6)') 'rezs:',i+1,       b0(i+1: min(bigNsize, i+k) )
         ! enddo

         ! deallocate(b0)

         ! ! COMPARISON BiCG x GMRES  -- END

         call bMVprodBIG( newton%rr, newton%x, state%nsize)    ! rr = Ax
         !print*, 'norm of the residual1 = ', norm2(newton%rr)

         newton%rr(:) = newton%rr(:) - newton%b(:)
         !print*, 'norm of the residual2 = ', norm2(newton%rr)

         call state%cpuTime%addSolveTime()

       endif

       call state%cpuTime%startPrepareTime()

       ! some manipulation for the fail of the computation
       if(state%linSolver%lin_solver_not_conv > 0) then
          open(53, file='GMRES_failed', status='UNKNOWN', position='append')
          res_max_val = 0.
          do i = 1, grid%nelem
             elem => grid%elem(i)

             kk = elem%ncv - 1
             elemDof = elem%dof * ndim * elem%Tdof
             elem%vec( res_vec, 1:elemDof) =  newton%rr(kk+1:kk+ elemDof)
             res_max_val = max( res_max_val, &
                  sqrt( dot_product(elem%vec( res_vec, 1:elemDof), elem%vec( res_vec, 1:elemDof) )))
          enddo
          ! FR added
          close(53)
       endif

       call state%cpuTime%addPrepareTime()

   end subroutine performOneNewtonStdgmStep

   !> write the outputs of the Newton method into the file 'ifile' -- 'criter'
   subroutine Newton_output(grid, ifile, l, loc_implicitly )
    class( mesh ), intent(in) :: grid
    integer, intent(in) :: ifile
    integer, intent(in) :: l
    logical, intent(in) :: loc_implicitly
    real :: t2

    associate ( Newton => state%nlSolver )

      call cpu_time(t2)

      if(state%time%iter == 0 .and. Newton%iter == 1) &
           write(ifile,'(a1,3i3,14i13, i10,2i3,i10,i9,i11, i12, 4i8, 4i12, i8)') '#',&
           1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,&
           21,22,23,24,25,26,27,28,29,30

      write(ifile,'(3i4,14es13.5, i10,2l3,F10.1,i9,i11, es12.4, 4i8, 4es12.4, i8)') &
           state%time%iter,Newton%iter,l, &                             ! 1..3
           state%time%iter + 1.*(Newton%iter-1)/(Newton%max_iter+2), &      ! 4
           state%estim(resA_ST,1), &                                       ! 5
           Newton%res/state%nsize,  Newton%res0/state%nsize, &             ! 6..7
           Newton%res,  Newton%res0, &                                     ! 8..9
           state%linSolver%tol,  Newton%lambda, Newton%theta, &            !10 .. 12
           state%L_estim(resA:resST), state%time%tau(1), &                 !13 .. 17
           state%linSolver%iter, loc_implicitly, &                         !18 .. 19
           state%linSolver%precond_update, t2- state%start_time, &         !20 .. 21
           Newton%Aiter, state%linSolver%iter_tot, &                       !22 .. 23
           state%time%ttime+ state%time%tau(1),  &   !24
           state%num_call_C, state%num_call_F, grid%nelem, state%nsize, &  !25 .. 28
           state%L_estim(resA) / max(1E-15, state%L_estim(resS) ),   &     ! 29
           state%L_estim(resA) / max(1E-15, state%L_estim(resT) ),   &     ! 30
           state%L_estim(resT) / max(1E-15, state%L_estim(resS) ), &       ! 31
           state%linSolver%residuum, state%linSolver%iter                  ! 32..33
           end associate

   end subroutine Newton_output

   !> Estimate the nonlinear error in Newton for aRES
   !> it should be called only when Newton%non_alg_stop == 'aRES'
   subroutine EstimateNewtonARes( Newton, time_prepare, time_estim, done, estS)
      class( Newton_t ), intent ( in ) :: Newton
      real, intent(inout) :: time_prepare
      real, intent(inout) :: time_estim
      logical, intent(inout) :: done
      real, intent(inout) :: estS
      class(element), pointer :: elem
      integer :: i
      real :: t1, t2, estim

      call cpu_time(t2)
      time_prepare = time_prepare + t2 - t1

      !print*, 'PerformOneSTDGMstep calling RezidErrorEstimate(true)'
      !call RezidErrorEstimates( .true., .false. )  !onlyAS =.true.  -- almost the same
      call RezidErrorEstimates( .false., .false. )

      call cpu_time(t1)
      time_estim = time_estim + t1 - t2
      !write(*,'(a6, 3es12.4)') 'timesD',time_prepare, time_solve , time_estim

      ! if(Newton%iter == 1) &
      !write(*,'(a4,a4,5a12,a7,2a12)') '  ', 'iter', '|F(x^k)| ','|F(x^k+1)|', &
      !      'eta_A', 'eta_S', &
      !      'rel. err. ', '  LA it',&
      !      'LA resid', '   CPU(s) '

      ! write(*,'(a4,2i4,5es12.4,a2,i5,2es12.4,l2)') 'NEW:', Newton%iter, l-1, &
      !       Newton%res,Newton%res0, &
      !       state%L_estim(resA), state%L_estim(resST), &
      !       state%L_estim(resA)/ state%L_estim(resST),' |', &
      !       state%linSolver%iter, state%linSolver%residuum, t1 -  state%start_time,&
      !       loc_implicitly

      if(.not. state%time_dependent) then
         if(state%estim(resA_ST,1) < Newton%tol2 ) then
            !if(state%estim(resA_ST_loc,1) < Newton%tol2 ) then  ! HERE TO CHANGE
            done = .true.

            !goto 200  ! Newton method has achieved the prescribed tolerance
         endif

      else !  state%time_dependent      ! HERE TO CHANGE

         !write(*,'(a8, 40es12.4)') &
         !     'STG:',state%L_estim(resA),state%L_estim(resS),state%L_estim(resT),&
         !     state%estim(resA_ST,1) , Newton%tol2, &
         !      state%L_estim(resA) /  max(1E-15, state%L_estim(resT))

         ! OLD VARIANT
         !if(state%estim(resA_ST,1) < Newton%tol2  ) then
         !.and. state%L_estim(resA) /  max(1E-15, state%L_estim(resT)) < 0.1) then

         ! NEW VARIANT
         estim = state%L_estim(resA) /max(1E-15, minval(state%L_estim(resS:resT)) )
         if( estim < Newton%tol2  ) then
            !if(state%estim(resA_ST_loc,1) < Newton%tol2 ) then
            done = .true.
            if(state%time%iter <=1) print*,'NEW aRES criterion'
            !goto 200  ! Newton method has achieved the prescribed tolerance
         endif
      endif

      estS = state%L_estim(resS)

      ! associate ( Newton => state%nlSolver )
      !   if(Newton%iter == 1) then
      !      open(103, file = 'criter_loc', status='replace')
      !   else
      !      open(103, file = 'criter_loc', status='UNKNOWN', position='append')
      !   endif

      !   do i=1,grid%nelem
      !      elem => grid%elem(i)
      !      if( elem%eta(resA, 1)/ elem%eta(resS, 1) > 1E-3 .and. elem%eta(resA, 1)> 1E-3 ) then
      !           write(103, *) elem%i + 1.*(Newton%iter-1)/(Newton%max_iter+2), &
      !           elem%eta(resA, 1), elem%eta(resS, 1), elem%eta(resT, 1)
      !           write(1000+elem%i, *) elem%i + 1.*(Newton%iter-1)/(Newton%max_iter+2), &
      !           elem%eta(resA, 1), elem%eta(resS, 1), elem%eta(resT, 1), '76hhju73'
      !        endif
      !   enddo

      !   close(103)

      ! end associate


   end subroutine EstimateNewtonARes


   !> Estimate the nonlinear error in Newton for aDWR
   !> AND compute the dual problem, then compute nl estimate again
   !> it should be called only when Newton%non_alg_stop == 'aDWR'
   subroutine EstimateNewtonNLDwr( grid, DWR, iter, newtonDone)
      class( mesh ), intent(inout) :: grid
      type( DWR_t), intent(inout) :: DWR
      integer, intent(in) :: iter
      logical, intent(out) :: newtonDone
      real :: tolerance, tolerance_old, fac_iter, estimNL_old
      integer :: ifile, ifileMD, ifileA
      character(len=20), parameter :: fileName = 'NLDwr_conv'
      character(len=20), parameter :: fileNameA = 'NLDwr_Alev'
      character(len=20), parameter :: fileNameMD = 'NLDwr_conv.md'
      real :: t2 

      ifile = 10
      ifileMD = 11
      ifileA = 12

      fac_iter = state%time%iter + 0.5 &
           + ( (state%nlSolver%iter-1) ) / (state%nlSolver%max_iter*1.1)

      !fac_iter = state%space%adapt%adapt_level + &
      !     ( (state%nlSolver%Aiter-1) ) / (state%nlSolver%max_iter*state%time%maxiter*1.1)

      if(state%time%iter == 0 .and. state%nlSolver%iter == 1) then !fac_iter <=1E-5) then
         open( ifile, file = fileName , status = 'replace', action="write" )
         open( ifileMD, file = fileNameMD , status = 'replace', action="write" )
         open( ifileA, file = fileNameA , status = 'replace', action="write" )
      else
         open( ifile, file = fileName , status = 'UNKNOWN', position="append" )
         open( ifileMD, file = fileNameMD , status = 'UNKNOWN', position="append" )
         open( ifileA, file = fileNameA , status = 'UNKNOWN', position="append" )
      endif

      !if( state%nlSolver%iter == 1) then
      if( state%time%iter_loc == 1 .and.  state%nlSolver%iter == 1) then
         write(ifile, '(x)')
         write(ifileMD, '(x)')
      endif
      
      estimNL_old = DWR%estimNL

      ! set tolerance
      if(state%space%adapt%adapt_level == 0 .and. state%time%iter == 0 ) then
         tolerance = state%nlSolver%tol2  !2
         eta = 0.
      else
         tolerance_old = DWR%nlDWR_tol
         eta = sqrt( state%estim(dwrEtaI_aver,1) )
         !tolerance = min( tolerance_old * 0.5, 2.5E-1 * eta  )

         tolerance = min( 10* tolerance_old, state%nlSolver%tol2 * eta )  !/ 2  )
         
         !tolerance = max( state%linSolver%tol_bicg , &
         !     state%nlSolver%tol2 * state%space%adapt%tol_max  )

         !tolerance = max(  estimNL_old * 1E-2, & 
         !     state%nlSolver%tol2 * state%space%adapt%tol_max  )
      endif
      DWR%nlDWR_tol = tolerance ! 


      ! estimate nlError: a_h(u_h, z_h) = F(U_h)*Z_h (2) '
      call computeNonlinDWRestimates( DWR, grid )
      state%estim( dwrA, 1) = DWR%estimNL**2
      
      call DWR%J%computeJu( grid )
      if(state%nlSolver%iter == 1) write(*,'(x)')
      write(*,'(a20, i3, i4, es14.6, 16es10.2)') 'JU dJU alg tol eta:', &
           state%nlSolver%iter, state%linSolver%iter,&
           DWR%J%Ju, abs(DWR%J%Ju - DWR%J%Ju_BiCG), &
           DWR%estimNL, tolerance, eta

      ! history of linear and nonlinear stopping criteria
      write(ifile, '(2i8, 2es12.4, 3i8,9es14.6, 2es12.4)') &
           state%space%adapt%adapt_level, state%nlSolver%TAiter, (1.*state%getBigNSize())**(1./3), &
           fac_iter, state%nlSolver%iter, state%linSolver%iter,state%linSolver%iter_tot, & ! 4..7
           DWR%J%Ju_BiCG, DWR%J%Ju, abs(DWR%J%Ju - DWR%J%Ju_BiCG) , &          ! 8..10
           abs(DWR%J%Ju - DWR%J%Ju_BiCG) / max(abs(DWR%J%Ju), 1E-15), &        ! 11
           DWR%J%Ju_BiEE, DWR%estimNL, tolerance, &                            ! 12..14
           eta,  sqrt( state%estim(dwrEtaI_primalA,1) ) , &                    ! 15..16
           state%time%tau(1)
           !state%nlSolver%iter, state%nlSolver%Aiter, state%nlSolver%lambda, state%nlSolver%lambda1
      close(ifile)

      if(state%nlSolver%Aiter == 1) then
         write(ifileA, *) state%nlSolver%TAiter-1, state%space%adapt%tol_max*0.9, state%time%iter-1 
         write(ifileA, '(x)')
         write(ifileA, *) state%nlSolver%TAiter, state%space%adapt%tol_max*0.9, state%time%iter 
      endif
      close(ifileA)

      call cpu_time(t2)
      write(ifileMD, 999) state%space%adapt%adapt_level, &
           state%time%iter , state%nlSolver%iter, state%getBigNSize() , &
           state%nlSolver%Aiter, state%linSolver%iter,state%linSolver%iter_tot, &
           t2 - state%start_time
      close(ifileMD)
      
999   format(5(i5 "  ") 2(i8 "  ") f12.1 " s" )
      
      if (DWR%estimNL < tolerance .and. state%nlSolver%TAiter > 1) then
         newtonDone = .true.
         DWR%dualProblem_computed = .true.
      else
         newtonDone = .false.
         ! next line added by Vitek, otherwise can cause troubles, RHS has to be recomputed
         DWR%dualProblem_computed = .true.  
     endif

     !print*, "---EstimateNewtonNLDwr---"
     !print*, 'After ',iter,'th nonlin est! estNLDWR <? TOL' , DWR%estimNL , tolerance

      if (iter >= state%nlSolver%max_iter) then
         newtonDone = .true.
         print*, 'Maximal number of Newton iterations achieved!'
      endif

   end subroutine EstimateNewtonNLDwr

   !> Estimate the nonlinear error in Newton for aDWR
   !> AND compute the dual problem, then compute nl estimate again
   !> it should be called only when Newton%non_alg_stop == 'aDWR'
   subroutine EstimateNewtonADwr( grid, DWR, iter, newtonDone)
      class( mesh ), intent(inout) :: grid
      type( DWR_t), intent(inout) :: DWR
      integer, intent(in) :: iter
      logical, intent(out) :: newtonDone
      real :: etaD, some_criter, t2, t1, dualTol

      ! a) estimate nlError for the 1st time
      ! if we are the 1st mesh then no ZST is computed
      if ( grid%adapt_level > 0 .or. iter > 1 ) then
         call computeNonlinDWRestimates( DWR, grid )

         if ( DWR%estimNL < state%nlSolver%tol ) then
            newtonDone = .true.
         else
            newtonDone = .false.
         endif

         print*, 'After ',iter,'th nonlin est! estNL <? TOL' , DWR%estimNL , state%nlSolver%tol
      else
         newtonDone = .true.
      endif

      if (iter >= state%nlSolver%max_iter) then
         newtonDone = .true.
         print*, 'Maximal number of Newton iters achieved -> compute dual problem!'
      endif

      ! update the nonlinear estimate to see if we really can end Newton iterations !
      if (newtonDone) then
            ! b) Dual problem
            call UpdateDualProblem( DWR )
            ! solve the dual problem with tol = C_A * estimNL
            dualTol = DWR%aDWR%linTol * DWR%aDWR%C_Safe
            call DualLinSolver( DWR, grid, iter, dualTol)

            !DWR%aDWR%iter_lin_dual =  -10  !DWR%linSolver_iter !DWR%aDWR%iter_lin_dual  +
            !call PlotSolDual( DWR%aDWR%iter_lin_dual )
            call PlotSolDual( 0 )
            ! too much memory
            !call PlotSolDual( iter )

            ! c) estimate nlError for the 2nd time
            call computeNonlinDWRestimates( DWR, grid )

            if ( DWR%estimNL < state%nlSolver%tol ) then
               newtonDone = .true.
            else
               newtonDone = .false.
            endif
            !print*, 'After second nonlin est! estNL <? TOL' ,  DWR%estimNL , state%nlSolver%tol
      end if

   end subroutine EstimateNewtonADwr

   !> Estimate the nonlinear error in Newton for aDWR
   !> AND compute the dual problem, then compute nl estimate again
   !> it should be called only when Newton%non_alg_stop == 'aDWR' and
   !> fixedIter - fixed number of lin iters -> decrease graphs !!!
   !> TESTED only for linear problems
   subroutine EstimateNewtonADwrFixed( grid, DWR, iter, newtonDone)
      class( mesh ), intent(inout) :: grid
      type( DWR_t), intent(inout) :: DWR
      integer, intent(in) :: iter
      logical, intent(out) :: newtonDone
      real :: etaD, some_criter, t2, t1, dualTol

      print*, 'EstimateNewtonADwrFixed called'


      ! b) Dual problem
      call UpdateDualProblem( DWR )
            ! solve the dual problem with tol = C_A * estimNL
      dualTol = 1.E-12 ! TOL IS NOT USED DWR%aDWR%linTol * DWR%aDWR%C_Safe
      call DualLinSolver( DWR, grid, iter, dualTol)

            !DWR%aDWR%iter_lin_dual =  -10  !DWR%linSolver_iter !DWR%aDWR%iter_lin_dual  +
            !call PlotSolDual( DWR%aDWR%iter_lin_dual )
            !call PlotSolDual( 0 )
            ! too much memory
            !call PlotSolDual( iter )

            ! c) estimate nlError for the 2nd time
      call computeNonlinDWRestimates( DWR, grid )

      print*, 'TOlerance: ', state%nlSolver%tol
      print*, 'Primal :' ,DWR%estimNL , DWR%estimLP
      print*, 'Dual:' , DWR%estimLD
      print*

      newtonDone = .true.
      print*, 'Always NEWTON DONE !!!'

   end subroutine EstimateNewtonADwrFixed



   !> solve the Newton problem F(w) = 0
   !> with tolerance given in state%nlSolver%tol with respect to one of the
   !> estimating methods: aRES, aDWR, rezL2
   subroutine NewtonSolve( grid, imp, iter, deg_plus, DWR)
      class( mesh ), intent(inout) :: grid
      integer, intent(inout) :: imp ! integer which is used to define implicitly - Should be changed
      integer, intent(inout) :: iter ! in - previous iterations , out - total iterations
      logical, intent(in) :: deg_plus
      type( DWR_t), intent(inout), optional :: DWR
      class(element) , pointer :: elem
      logical :: newtonDone, PTSdone  ! Newton and pseudo-time stepping finished
      real :: time_prepare, time_estim, t1, t2
      integer :: i, nDamping, maxNumberOfDamping
      real :: ratio_Damping
      integer :: iPTS, nPTS, iter_total, nsize
      real :: res_max_val, time, lost
      real :: estS, estS_ini, fac
      logical :: loc_implicitly, bicg_used

      bicg_used = .false.    ! use of the bicg method for the primal and dual solutions

      ! factor for divergence, maximal increase of the initial residuum
      fac = 100. ! can be small, decrease of the time step will follow
      if (state%time%tau_fixed) fac = 1E+08


      ! initialization parameteres for the setting of divergence
      estS = 1.
      estS_ini = 1.

      !nsize = state%nsize
      ! FR_NEWTON
      nsize = state%getBigNSize()

      iter_total = 0   ! sum of all Newton iterations over all pseudo-time steps
      newtonDone = .false.
      PTSdone = .false.

      associate( nlSolver=> state%nlSolver)

        if(state%time%iter==0) nlSolver%lambda1 = 1.0! initialization of lambda (= damping factor)

        !print*,'nlSolver%anderson ===' , nlSolver%anderson
        ! Anderson acceleration - no standard damping is used, only update the solution
        if ( nlSolver%anderson ) then
           ! DRIV: allocate( AndersonAcceleration_t :: nlSolver%andersonAcc )
           nlSolver%andersonAcc%AA_max = 20

           call nlSolver%andersonAcc%init( nsize )
           maxNumberOfDamping = 1
           ratio_Damping = 0.85
           !maxNumberOfDamping = 3
        else
           !maxNumberOfDamping = 5
           !ratio_Damping = 0.5

           !maxNumberOfDamping = 9
           ratio_Damping = 0.75
           maxNumberOfDamping = 15
           !ratio_Damping = 0.5
           if(nlSolver%max_iter == 1) maxNumberOfDamping = 1 ! typically for linear problem
        end if

        ! pseudo-time stepping
        nPTS = 1 ! ONLY one-repetition of the Newton step
        iPTS = 1 ! index of the loop

        ! pseudo-time stepping
        if( nlSolver%pseudoT) then
           nPTS = 20  ! maximal number of the pseudo-time step repetitions
           call nlSolver%pseudo_time%init(nsize )
           nlSolver%pseudo_time%nPTS = nPTS
           nlSolver%pseudo_time%PTS_rho = 10000.
        endif

        ! MAIN CYCLE for the pseudo-time stepping
        do while ( (.not. PTSdone) .and. (iPTS <= nPTS) )
           iter = 1


           if( nlSolver%pseudoT) then
              call CopyWST_toLongVector( grid, nsize, nlSolver%pseudo_time%PTS_x(1, 1:nsize) )

              !write(*,'(a25, 2i5,l3, 30es12.4)') '## pseudo-time stepping:', iPTS, nPTS, PTSdone, &
              !     sqrt(dot_product(nlSolver%pseudo_time%PTS_x(1,:), nlSolver%pseudo_time%PTS_x(1,:))), &
              !     nlSolver%pseudo_time%PTS_x(1, 1:5)
           endif



           ! MAIN NEWTON CYCLE
           do while ( (.not. newtonDone) .and. (iter <= nlSolver%max_iter) )
              ! Three parts:
              !    1) solve the linear system
              !    2) find the damping
              !    3) estimate the error and decide whether to quit - newtonDone = .true.

              !!call  Write_PrimalDualSolution(1000+state%nlSolver%Aiter)
              
              if( nlSolver%pseudoT) then
                 call CopyWST_toLongVector( grid, nsize, nlSolver%pseudo_time%PTS_x(2, 1:nsize) )
                 nlSolver%pseudo_time%PTS_x(2, 1:nsize) = nlSolver%pseudo_time%PTS_x(2, 1:nsize) &
                      - nlSolver%pseudo_time%PTS_x(1, 1:nsize)
              endif


              !print*," 1) SOLVE THE LINEAR SYSTEM"
              if(state%space%estim_space == 'DWR' .and. present(DWR) ) then
                 if(nlSolver%non_alg_stop == 'aDWR' .or. DWR%BI )  then

                    if( DWR%BI ) bicg_used = .true.

                    !if(state%space%estim_space == 'DWR' .and. &
                    !     (nlSolver%non_alg_stop == 'aDWR' .or. DWR%BI ) ) then
                    !if (present(DWR)) then
                    ! inner linear primal problem step
                    ! print*, ' performOneNewtonStdgmStep called '
                    call performOneNewtonStdgmStep( nlSolver, grid, imp,&
                         iter, deg_plus, newtonDone, res_max_val, loc_implicitly, DWR )
                 endif
              elseif(nlSolver%non_alg_stop == 'aDWR') then
                 stop 'performOneNewtonStdgmStep should be called with DWR in its &
                      argument for DWR method'

              else
!                 print*,'call performOneNewtonStdgmStep BB'
                 call performOneNewtonStdgmStep( nlSolver, grid, imp, iter, &
                      deg_plus, newtonDone, res_max_val, loc_implicitly )

                 !print*, "Norm of the res = ", norm2(nlSolver%rr(:))
              endif


              !print*," 2) seeking of optimal damping parameter",nlSolver%anderson
              call state%cpuTime%startSolveTime()

              select type ( nlSolver )
              type is ( Newton_t )
                 ! aDWR  !
                 if ( nlSolver%non_alg_stop == 'aDWR') then
                    ! AND zST ready
                    if (DWR%dualProblem_computed) then
                       call setDampingInNewton_aDWR( nlSolver, grid, DWR, deg_plus, &
                            eta, loc_implicitly, res_max_val, maxNumberOfDamping, nDamping)
                       !   print*, 'temporarily classical damping back!'
                       !   call setDampingInNewton( nlSolver, grid, deg_plus, &
                       !       eta, loc_implicitly, res_max_val, maxNumberOfDamping, nDamping)

                    else
                       print*, 'zST is not ready yet. Classical RES damping is called!'
                       call setDampingInNewton( nlSolver, grid, deg_plus, &
                            eta, loc_implicitly, res_max_val, maxNumberOfDamping, nDamping, ratio_Damping)

                    endif
                 else
                    !print*,'############################################################ 36gd3'
                    !call setDampingInNewtonLocal( nlSolver, grid, deg_plus, &
                    !   eta, loc_implicitly, res_max_val, maxNumberOfDamping, nDamping)
                    if(bicg_used) then
                       !maxNumberOfDamping = 1
                       call setDampingInNewton( nlSolver, grid, deg_plus, &
                            eta, loc_implicitly, res_max_val, maxNumberOfDamping, nDamping,&
                            ratio_Damping, DWR)
                    else
                       call setDampingInNewton( nlSolver, grid, deg_plus, &
                            eta, loc_implicitly, res_max_val, maxNumberOfDamping, nDamping,&
                            ratio_Damping )
                    endif

                 end if

                 ! Anderson Acceleration, 3 variants inside
                 if (nlSolver%anderson ) &
                      call setAndersonInNewton( nlSolver, grid, deg_plus, eta, loc_implicitly)
                 !call setAndersonInNewton_OLD( nlSolver, grid, deg_plus, eta, loc_implicitly)

                 class default
                 stop 'other type of nlSolver'
              end select

              call state%cpuTime%addSolveTime()
              call state%cpuTime%startEstimTime()

              if(nlSolver%newton_count >= 8) nlSolver%implicitly = .true.
              ! !!if(Newton%theta > 0.75) nlSolver%implicitly = .true.
              if(nlSolver%theta > 0.5) nlSolver%implicitly = .true.
              !if(Newton%lambda < 0.9) nlSolver%implicitly = .true. !lambda is small, C(w) update
              if(state%linSolver%lin_solver_not_conv > 0) nlSolver%implicitly = .true.

              !if(nlSolver%implicitly ) write(*,'(a8,i5, a14,i5,a8,es12.4,a8,l2 )')&
              !     'iter = ',state%time%iter, '### Ncount =',nlSolver%newton_count, &
              !     ',  theta = ', nlSolver%theta,',  non conv', state%linSolver%lin_solver_not_conv
              !					state%isol = state%isol + 1
              !					call WriteProgressOutput( 'STE' )
              !call WriteMatrixLU()

              !write(*,'(a8, 2i5, 30es12.4)') 'PTS rez:',iPTS, iter, &
              !     sqrt(dot_product(nlSolver%b1, nlSolver%b1)), nlSolver%res0

              !open(15, file = 'pseudo', status = 'UNKNOWN', position = 'append')
              !if(iter == 1) write(15, '(x)')
              !write(15,'(2i5, 30es12.4)') iPTS, iter, &
              !     1.*( (iPTS-1)* nlSolver%max_iter + iter) / ( nPTS * nlSolver%max_iter + 5), &
              !     sqrt(dot_product(nlSolver%b1, nlSolver%b1))
              !close(15)

              !print*," 3 ) ESTIMATE THE ALGEBRACIC (NONLINEAR) ERROR"
              
              select type ( nlSolver )
                 
              type is ( Newton_t )

                 if (nlSolver%non_alg_stop == 'nlDWR') then
                     if( bicg_used ) then   ! BiCG method has its own stopping criterion

                        !call EstimateNewton_DWR( grid, nlSolver, DWR, newtonDone )
                        !print*,'Newton-BICG stopping criterion', newtonDone
                        call EstimateNewtonNLDwr(grid, DWR, iter, newtonDone)

                        ! VD, new adding

                        !print*,' TRES_DWR '
                        call ComputeST_Terms_impl_false(.true.)
                        !  TRES_DWR end

                        !print*, "call RezidErrorEstimates for time stepping???"
                        call RezidErrorEstimates( .false., .false. ) ! for the setting of time step
                     else
                        stop "nlDWR method tested only with BiCG!"

                     endif

                 ! nonlinear algebraic residuum criterion
                 elseif (nlSolver%non_alg_stop == 'aRES') then
                    call EstimateNewtonARes( nlSolver, time_prepare, time_estim, newtonDone, estS )
                    if(iter == 1) estS_ini = estS
                    if(estS > fac * estS_ini)  newtonDone = .true. ! DIVERGENCE

                 else if (nlSolver%non_alg_stop == 'aDWR') then
                    print*,'aDWR 1'
                    call DWR%J%computeJu(grid)
                    write(debug, *) 'Maybe we should compute Ju_exact - to outputfile'
                    print*,'aDWR 2'
                    call DWR%J%computeJu_exact(grid)
                    print*,'aDWR 3'
                    call DWR%writeNlErrorFile(grid%nelem)
                    print*,'aDWR 4', DWR%aDWR%fixedIter

                    if ( DWR%aDWR%fixedIter ) then ! fixed number of lin iters - decrease graphs
                       call EstimateNewtonADwrFixed( grid, DWR, iter, newtonDone)
                    else
                       call EstimateNewtonADwr( grid, DWR, iter, newtonDone)
                    endif

                 else if (nlSolver%non_alg_stop == 'rezL2') then
                    write(*,*) 'FR nolinear rezL2 reziduum:' , &
                      nlSolver%res0,  '<' , nlSolver%tol !* nsize

                    !if(nlSolver%res0 < nlSolver%tol * nsize )  then
                    if(nlSolver%res0 < nlSolver%tol )  then
                       !write(*,*) 'Reziduum:' , nlSolver%res0,  '<' , nlSolver%tol * nsize
                       !call cpu_time(t2)
                       !time_prepare = time_prepare + t2 - t1
                       ! Newton method has achieved the prescribed tolerance
                       newtonDone = .true.
                    endif
                    !write(*,*) 'Reziduum:' , nlSolver%res0,  '>' , nlSolver%tol * nsize

                 else
                    stop 'Unknown method for nonlinear algebraic error estimation in NewtonSolve!'
                 endif  ! end of other technique
                 class default
                 stop 'other type of nlSolver'
              end select

              call state%cpuTime%addEstimTime( )

              ! Write Newton output
              open(91, file='criter', status='unknown', position='append')
              if(nlSolver%iter == 1) write(91,'(x)')
              if(nlSolver%Aiter == 1) write(91,'(x)')
              !if(state%time%iter == 1) print*, 'TODO: Repair NewtonOutput for other methods than aRES!'
              call Newton_output(grid, 91, nDamping, loc_implicitly )
              close(91)

              nlSolver%res1 = nlSolver%res

              iter = iter + 1

              !!call  Write_PrimalDualSolution(2000+state%nlSolver%Aiter)

           end do ! while
           ! end of newton step, save the solution, write down and  go on



           iter_total = iter_total + iter ! sum of all Newton iterations over all pseudo-time steps
           iPTS = iPTS + 1

           if( nlSolver%pseudoT)  nlSolver%pseudo_time%PTS_rho = nlSolver%pseudo_time%PTS_rho / 10.
        end do  ! while
        ! end of the pseudotime stepping

        iter = iter_total

        ! VD no necessary printing, can be deduced from the number of used
        ! NEWTON ITERATIONS printed on the screen
        !if (( iter > nlSolver%max_iter) .and. (.not. newtonDone) ) then
        !   print*, 'Newton solver ended without reaching the tolerance', &
        !           'achieving max. number of iterations:' , nlSolver%max_iter
        !endif

        if ( nlSolver%anderson ) then
           call nlSolver%andersonAcc%dealloc()
        endif

        nlSolver%converged = newtonDone  ! PCQ (23. 8. 2019)
        !nlSolver%converged = .true.

        fac = 10
        if (state%time%tau_fixed) fac = 1E+08
        if(estS > fac * estS_ini) nlSolver%converged = .false. ! DIVERGENCE

!        print*,'#>MNH', fac, estS , fac * estS_ini,  nlSolver%converged
      end associate ! nlSolver

!      print*,' end subroutine NewtonSolve'
   end subroutine NewtonSolve


   ! compute the norm of the residual locally for each element in the mesh
   subroutine localNewtonEstimate( newton, grid, eta, estim )
      class( Newton_t ), intent ( inout ) :: newton
      class( mesh ), intent(in) :: grid
      real, intent(in) :: eta ! 1/tau
      real, dimension(1:grid%nelem), intent(inout)  :: estim
      class(element), pointer :: elem
      real, dimension(:), allocatable :: y
      integer :: nsize, dof, ncv, i, nelem

      nelem = grid%nelem

      ! rhsST -> newton%b1
      call FillVectorST( newton%b1(1:state%nsize), eta )
      nsize = size( newton%b1 )
      allocate( y(1:nsize) )

      ! for iLU precond only !!!! ?
      if (state%linSolver%name /= 'GMRES_ILU' ) &
         stop 'localNewtonEstimate only for ILU precond'

      if ( state%time%disc_time == 'STDG' ) then
         call bMViLUprodST( y, newton%b1, nsize )
      else
         stop 'Local damping not tested for BDF'
         call bMViLUprod(y, newton%b1 ,nsize)
      endif

      !print*, 'norm b', norm2( newton%b1 ) , 'y:',norm2(y)

      ! compute locally l2(K) norm of the vector y
      ! newton%res is already computed, but we need it locally
      do i = 1,nelem
         elem => grid%elem(i)
         dof = elem%dof * elem%Tdof *ndim
         ncv = elem%ncv
         ! use sqrt or not???
         estim(i) = dot_product(y(ncv:ncv+dof-1), y(ncv:ncv+dof-1) )**0.5
      end do !i

      deallocate(y)

   endsubroutine localNewtonEstimate

end module newton_mod
