!> aposteriori error estimation based on the solution of local Neumann problems
!> for meshes with hanging nodes, we create a conforming sub-triangulation gridN
module neumann_st_estim
  use paramets
!  use main_data
!  use ama_L2interpol
  use data_mod ! contains type(mesh) :: gridN for computation
  use mesh_oper
  use problem_oper
  use euler_problem
  use rav_tho_nedelec
  use loc_rav_tho_ned
  use eval_rav_tho_ned
  use dual_estim
  use inviscid_fluxes
  use f_mapping
  use blocks_integ
  use basis
  use eval_sol
  use errorDual
  use submesh
  use eval_jumps
  use regularity
  use model_oper
  use neumann_estim
  use rtn_st_mod
  use stdgm_mod
  
  implicit none

  public:: ComputeRTNstEstimNeumann
  public:: LocalNeumannVertexProblem
  public:: Eval_DGphi_elem
  public:: Eval_RTNphi_elem
  public:: Eval_tau_g_elem
  public:: Integ_dot_product_RTNphi_elem
  public:: Integ_divRTNphi_DGphi_elem
  public:: ComputeLocRTNMomentumsElem2
  public:: Eval_p_rob_estims
  public:: Eval_BC_estim

contains
  !> evaluate the error estimates based on the Helmholtz decomposition
  subroutine  ComputeRTNstEstimNeumann( estim , tQnum )
    real, dimension(1:max_eta,1:ndim), intent(out) :: estim
    integer, intent(in) :: tQnum
    class(element), pointer :: elem, elem1
    logical, dimension(:), allocatable :: inner ! inner vertex
    integer, dimension(:), allocatable :: N  ! number of elements sharing a vertex
    integer, dimension(:,:,:), allocatable :: supp !list of corresponding elements
    real, dimension(:,:), allocatable :: rez2 ! arrays for the correction of EE due to HG nodes
    integer :: i_var, is
    integer :: maxdeg = 20
    integer :: i,j, k, dof, l1, l2
    real :: normF, normS, pi
    logical:: CreateSubmesh
    
    print*, ' subroutine  ComputeRTNstEstimNeumann has started '
    
    pi = 2 * asin(1.)


    state%nlSolver%implicitly = .false.
    call ComputeST_Terms( .false. )


    ! preparation of arrays for the right-hand sides
    call Prepare_RTNst_Neumann( ) 
    
    do i=1,grid%nelem
       elem => grid%elem(i)

       ! elem%wSS(1, 0, :) = actual solution
       ! elem%wSS(1, 1, :) = p-1 projection
       ! elem%wSS(1, 2, :) = p-2 projection
       allocate(elem%wSS(1:1, 0:2, 1:elem%dof*ndim)) 

       ! original solution
       elem%wSS(1, 0, 1:elem%dof * ndim)  = elem%w(0, 1:elem%dof * ndim) 

       ! should be the projection to lower degree
       !elem%wSS(1, 0, 1:elem%dof * ndim)  = elem%w(0, 1:elem%dof * ndim) 

       !elem%wSS(1, 0, :) = 0.
       !elem%wSS(1, 0, 1: elem%deg * (elem%deg+1) / 2)  =  elem%w(0,1: elem%deg * (elem%deg+1) / 2 ) 

       call Energy_Elem_projection( elem)

       !call PlotElemFunction3D(10+state%space%adapt%adapt_level, elem, elem%dof, elem%ws(1, 1:elem%dof))
       !call PlotElemFunction3D(20+state%space%adapt%adapt_level, elem, elem%dof, elem%ws(0, 1:elem%dof))

       !if(i <= 1 .or. i == -29) then
       !   write(*,'(a8, 40es12.4)') 'w  ',elem%w( 0, :)
       !   write(*,'(a8, 40es12.4)') 'ws 0',elem%wSS(1, 0, :)
       !   write(*,'(a8, 40es12.4)') 'ws 1',elem%wSS(1, 1, :)
       !   write(*,'(a8, 40es12.4)') 'ws 2',elem%wSS(1, 2, :)
       !   print*,'--------------------------------------'
       !endif
    enddo

    !stop "e3eu387ed38"


    !######################################################################################
    CreateSubmesh = .false.

    ! contains the grid some HG nodes?
    select type (grid)
    type is (MeshHG_t)
       CreateSubmesh = .true.
    end select

    if(CreateSubmesh) then
       ! we construct a simplicial subrid gridN
       call Create_Conforming_Subgrid(grid, gridN)

       print*,'LIFTING??'
       call Eval_Lifting(gridN)

    else
       ! gridN is identical with grid
       gridN => grid
    endif


    ! do i=1, gridN%nelem
    !    elem1 => grid%elem(i)
    !    elem => gridN%elem(i)
    !    write(*,'(a20, i5, 300es12.4)') 'elemN',i,elem1%w(0, :)
    !    write(*,'(a20, i5, 300es12.4)') 'elem',i,elem%w(0, :)
    !    print*
    !    write(*,'(a20, i5, 300es12.4)') 'elemN',i,elem1%wSS(1, 0, :)
    !    write(*,'(a20, i5, 300es12.4)') 'elem',i,elem%wSS(1, 0, :)
    !    print*
    !    write(*,'(a20, i5, 300es12.4)') 'elemN',i,elem1%wSS(1, 1, :)
    !    write(*,'(a20, i5, 300es12.4)') 'elem',i,elem%wSS(1, 1, :)
    !    print*,'-----------------------------------------------------------'
    ! enddo


    !######################################################################################

    print*,' # starting of ComputeLocalNeumannEstim( )'
    ! create a list of elements sharing a vertex
    allocate( inner(1:gridN%npoin), N(1:gridN%npoin))
    allocate( supp(1:gridN%npoin, 1:maxdeg, 1:2) )
    allocate( rez2(1:gridN%npoin, 1:3) )

    call SeekVertexSupports(gridN, maxdeg, inner, N, supp)

    !write(201,*) '***********************', state%space%adapt%adapt_level
    !write(301,*) '***********************', state%space%adapt%adapt_level
    !write(401,*) '***********************', state%space%adapt%adapt_level

    ! ! graphical verification
    ! do i=1,gridN%npoin
    !    write(*,'(a4,2i5,l3, 30i5)' ) '###',i,N(i),inner(i), supp(i,1: N(i) )

    !i = 15
    !do j=1, abs(N(i))
    !   elem => gridN%elem(supp(i,j,1))
    !
    !   write(2000+i, *) gridN%x(i,:)
    !   write(2000+i, *) elem%xc(1:2)
    !   write(2000+i, '(x)' )
    !enddo
    ! enddo

    !print*,'MODIFY 3 times  #################################'

    do i=1,gridN%nelem
       elem => gridN%elem(i)
       dof = DOFtriang( MaxDegreeImplemented ) !???
          ! degree of reconstruction depends also on the neighbour

       allocate( elem%RTNphi(1:dof, 1:5) ) ! flux reconstruction on elem in DG basis functions

       elem%eta( : , :) = 0.
    enddo


    !do i_var = 1, 0, -1   !!! 2 => p-2 , 1 => p-1,  0 => p
    do i_var = 0, 0        !!! 2 => p-2 , 1 => p-1,  0 => p
       do i=1,gridN%nelem
          elem => gridN%elem(i)

          elem%RTNphi(:,:) = 0.
          elem%RTNflux_deg = 0
          
          elem%eta(1:P_s_p4, :) = 0.

       enddo
       
       print*, '! solution of local Neumann problem for each vertex'
       do i=1,gridN%npoin
          !do i=21, 21
          call LocalNeumannSTVertexProblem(i_var, i, inner(i), N(i), supp(i, 1:N(i),1:2), rez2(i, 1:3))
       enddo

       print*,'ATTENTION IN WERTYSDF stopped here'
       stop
       
       ! compute ESTIMATES
       state%estim(:, :)  = 0.
       do i=1,gridN%nelem
          elem => gridN%elem(i)

          ! correction due to presence of HG nodes
          elem%eta(P_HG, :) = 0.
          if(CreateSubmesh) then
             do j=1, elem%flen
                k = elem%face(idx, j)
                elem%eta(P_HG, 1)  = elem%eta(P_HG, 1)  + rez2(k,3)**2 * rez2(k,2)
             enddo
             elem%eta(P_HG, 1)  = sqrt(elem%eta(P_HG, 1)) * 4 * elem%diam / pi
          endif

          call Eval_p_rob_estims(i_var, elem, normF, normS)

          !state%estim(P_Flux:P_potP, :) = state%estim(P_Flux:P_potP, :) + elem%eta(P_Flux:P_potP, :)**2

          !state%estim(P_F_p1:P_F_p4, :) = state%estim(P_F_p1:P_F_p4, :) &
           !    + (elem%eta(P_F_p1:P_F_p4, :)*normF)**2

          !state%estim(P_s_p1:P_s_p4, :) = state%estim(P_s_p1:P_s_p4, :) &
          !     + (elem%eta(P_s_p1:P_s_p4, :)*normS)**2

          !state%estim(:, :) = state%estim(:, :) + elem%eta(:, :)**2

          !if(i==1) &
          !     print*,'##S#E', state%estim(P_tot, 1),  state%estim(P_potP, 1), &
          !     write(*,'(a6,120es9.1)') 'EDE#@', state%estim(:,1)
          !print*

          if(i_var == 0) deallocate( elem%RTNphi )
       enddo
    !print*,'##S#E', state%estim(P_pot, 1),  state%estim(P_potP, 1)

       write(*,'(a2, 6(a10, es9.2))') 'Ee',&
            'P_Rez = ', state%estim(P_rez, 1)**0.5 , &
            'P_Flux = ', state%estim(P_flux, 1)**0.5 , &
            'P_FR = ', state%estim(P_FR, 1)**0.5 , &
            'P_pot = ', state%estim(P_pot, 1)**0.5, &
            'P_BC = ', state%estim(P_BC, 1)**0.5, &
            'P_tot = ', state%estim(P_tot, 1)**0.5
       !print*,state%estim(P_potP, 1)**0.5  !, gridN%elem(1)%eta(P_potP, 1)
    enddo ! i_var

    deallocate( inner, N, supp, rez2 )



    !######################################################################################
    ! setting back the error estimates and results
    if(CreateSubmesh) then
       ! estims to elems
       call Set_estim_from_Subgrid(grid, gridN)

       ! deallocation of gridN
       call Delete_Submesh(gridN)
       deallocate(gridN)
    endif
    !######################################################################################



    ! values for adaptation
    do i = 1, grid%nelem
       elem => grid%elem(i)
       elem%eta(resST, 1) =  elem%eta(P_tot, 1)

       call Set_Elem_regularity(elem)
    enddo

    do i=1,grid%nelem
       elem => grid%elem(i)
       deallocate(elem%wSS)

       deallocate(elem%rtn_flux) 
       deallocate(elem%rtn_radauD) 

    enddo



    !call  Regularity_Smoothing( )


100 format(a6,2es12.4,'|',14es12.4)
!    write(22,100) &
!         '$$$',state%space%h, state%time%tau(1), estimL(Hrez:Heta1, 1:ndim)



    !print*,' # finish of ComputeLocalNeumannEstim( )', state%estim(P_tot, :)**0.5

  end subroutine ComputeRTNstEstimNeumann




  !> solve the local Neumann problem on the patch corresponding to a vortex of the mesh
  subroutine LocalNeumannSTVertexProblem(i_var, ip, inner, N, supp, rez2 )
    integer, intent(in) :: i_var   ! index of variant
    integer, intent(in) :: ip      ! index of node
    logical, intent(in) :: inner   ! inner vertex?
    integer, intent(in) :: N   ! number of elememnts sharing vertex
    integer, dimension(1:N,1:2), intent(in) :: supp   !idx of elems sharing vertex
    real, dimension(1:3), intent(inout) :: rez2   ! arrays for the correction of EE due to HG nodes
    class(element), pointer :: elem
    type(volume_rule), pointer :: V_rule
    class(Time_rule), pointer :: T_rule
    integer :: st_dN, st_dNR, st_dNF         ! dimension of the space-time Neumann problem
    integer :: dN, dNR, dNF         ! dimension of the Neumann problem
    integer :: dN_pt, dNR_pt, dNF_pt     ! dimension of the Neumann problem
    real, dimension (:,:), allocatable  :: TT  ! ONLY time matrix 
    real, dimension (:,:), allocatable  :: stA, stB  ! space-time matrix for Neumann local problem
    real, dimension (:,:), allocatable  :: A, B  ! matrix for Neumann local problem
    real, dimension (:,:), allocatable  :: A_pt, B_pt  ! matrix for Neumann local problem

    real, dimension (:,:), allocatable  :: rhs, x ! RHSs and sols for Neumann local problem
    real, dimension (:,:), allocatable  :: rhs_pt, x_pt ! RHSs and sols for Neumann local problem
    real, dimension (:,:,:), allocatable  :: DGphi ! DG basis functions
    real, dimension (:,:,:,:), allocatable  :: RTNphi ! RTN basis functions
    real, dimension (:,:), allocatable  :: tau_g , xi ! RHS
    real, dimension (:,:), allocatable  :: Fx, A_RR, A_RD, uD
    real, dimension (:,:,:), allocatable  :: sigma
    integer, dimension (:,:, :), allocatable  :: itrans
    real :: area
    integer :: i, i_n, i_p, j, j1, k, kk, iBC, ie0 , k1, k2, k3 !, l1, l2
    integer :: F_face, F_vol, F_tot, F_size, iptest
    integer :: Qdof, Qnum, Qdeg, Tdof, tQnum, TQdof, ie, ie1, ie2
    integer :: deg, deg1, Fdof, Rdof, info
    logical :: innerR

    iptest = 1

    !innerR = .true.  ! simplified modification for non-homogenous BC
    innerR = inner   !  a rigorous modification for non-homogenous BC

    rez2(:) = 0.

    !if(state%model%idiff == 9 ) then
    ! Neumann boundary condition 
    if(state%model%icase == 63) then
       if( grid%x(ip, 1) < 1E-5 .and. grid%x(ip, 2) > - 1E-5  &
         .and. grid%x(ip, 2) < 24 + 1E-5 ) then
          !print*,'Neuman BC:',  grid%x(ip, 1:2)
          innerR = .true.
       endif
    ! Neumann boundary condition 
    else if(state%model%icase == 67) then
       if( grid%x(ip, 1) < 1E-5 .and. grid%x(ip, 2) > - 1E-5  &
         .and. grid%x(ip, 2) < 1 + 1E-5 ) then
          !print*,'Neuman BC:',  grid%x(ip, 1:2)
          innerR = .true.
       endif
    endif


    !if(inner) then ! inner vortex

    deg = maxval(gridN%elem(supp(1:N,1))%deg) ! setting of p of reconstruction

    if(i_var == 2)  deg = deg - 2   ! low order reconstruction for hp-variant
    if(i_var == 1)  deg = deg - 1   ! low order reconstruction for hp-variant

    !if(ip <= 2)
    !print*,'!! ATTENTION, TEST in neumann.f90 !!!! ', deg
    if(deg <= 0) return

    !deg = 2
    !deg = 1
    !deg = 0

    deg1 = deg + 1

    Rdof = DOFtriang( deg )
    Fdof = SetRTNdof( deg)

    !! Tdof = dof,  w. r. t. time, it is constant for all elements,
    !! tQnum == index of quadrature in time
    !Tdof = grid%elem(1)%Tdof
    Tdof =  state%time%deg + 1
    tQnum = state%time%Qnum
    allocate(TT(1:Tdof, 1:Tdof), source = 0.0 ) 

    T_rule => state%time%T_rule(tQnum)
    TQdof = T_rule%Qdeg

    print*,'###ti tr', Tdof, tQnum, TQdof

    do k=1, TQdof
       do i = 1, Tdof
          do j = 1, Tdof  
             TT(i, j) = TT(i, j) + T_rule%phi(i,k)*T_rule%phi(j,k) * T_rule%weights(k)
          enddo
       enddo
    enddo

    TT(1:Tdof, 1:Tdof) = TT(1:Tdof, 1:Tdof) * state%time%tau(1)

    do i=1,Tdof
       write(*,'(a12, i5, 30es12.4)') 'time MM', i, TT(i, :)
    enddo

    
    ! setting of num_quadrature
    !Qnum = state%space%Qdeg(deg + 3, 1)
    Qnum = state%space%Qdeg( min (MaxDegreeImplemented, deg + 2), 1)
    Qdeg = Qnum

    V_rule => state%space%V_rule(Qnum)
    Qdof = V_rule%Qdof

    ! setting number of degrees of freedom of Q_h^a, V_h^a

    ! RTN DOF
    F_face = deg1         ! for one face
    F_vol  = deg * deg1   ! for one element
    F_size =  2*(deg+1) + F_vol  ! opposite is always ignored

    F_tot = F_face + F_vol

    if(inner) then
       ! inner nodes
       dNR = N * Rdof  - 1    ! Q_h^a constraint
       dNF = N * (F_face + F_vol) ! V_h^a: 1 face +  1 vol per elem

    else
       ! boundary  nodes
       dNR = N * Rdof          ! Q_h^a NO constraint
       dNF = N * (F_face + F_vol) + F_face ! open list of adjacent elemenst

       ! for potential reconstruction
       dNR_pt = N * Rdof - 1         ! Q_h^a NO constraint
       dNF_pt = N * (F_face + F_vol) - F_face ! both edgech on \partial\omega are taken off
       dN_pt  = dNR_pt + dNF_pt
    endif

    dN = dNR + dNF

    allocate(A(1:dNF, 1:dNF+2), B(1:dNF+2, 1:dNR) )
    allocate( rhs(1:dN, 1:2), x(1:dN, 1:2) )
    A(:,:) = 0.
    B(:,:) = 0.

    ! reconstruction of the potential for boundary faces has different dimension
    if(.not. innerR ) then
       allocate(A_pt(1:dNF_pt, 1:dNF_pt+2), B_pt(1:dNF_pt+2, 1:dNR_pt) )
       allocate( rhs_pt(1:dN_pt, 1:2), x_pt(1:dN_pt, 1:2) )
       A_pt(:,:) = 0.
       B_pt(:,:) = 0.

       ! nonhomogeneous Dirichlet BC
       allocate(uD(1: F_face, 1:2)  )
    endif

    !test functions on the element
    allocate(DGphi(1:N, 1:Rdof, 1:Qdof) )

    allocate(RTNphi(1:N, 1:Fdof, 1:3, 1:Qdof) )  !2nd idx: 1,2 -components, 3 = div

    allocate(A_RR(1:Fdof, 1:Fdof+2), A_RD(1:Fdof+2, 1:Rdof ) )

    allocate(tau_g(1:5, 1:Qdof), xi(1:3,1:2) )! 1st and 2nd parts of RHS

    area = sum(gridN%elem(supp(:,1))%area)

    ! itrans(:,:, 1:2) inner OR flux,  itrans(:,:,3:4)  bound AND potential
    allocate(itrans(1:N, 1: F_size, 1:4) )

    !write(*,'(a8, i5, l3, 12i5)') 'SIZES:',ip, inner, N, deg, Fdof, Rdof, dNF, dNR
    
    do i=1, N
       ie = supp(i,2)   ! inner index of the vertex

       elem => gridN%elem(supp(i,1))

       ! DG basis fucntion with the constraint \int_\omega_a \phi_i dx = 0
       call Eval_DGphi_elem(elem, V_rule, Rdof, DGphi(i, 1:Rdof, 1:Qdof), area, inner)

       ! RTN basis functions with \q \cdot \nn = 0 pn \partial \omega_a
       call Eval_RTNphi_elem(elem, V_rule, deg, Fdof, &
            RTNphi(i, 1:Fdof, 1:3, 1:Qdof), ie)

       ! right hand side of the Neumann problem
       ! triangle coordinates for gradient of \psi_a
       xi(1:3, 1:2) = gridN%x(elem%face(idx, 1:3), 1:2)
       call Eval_tau_g_elem(elem, i_var, V_rule, tau_g(1:5, 1:Qdof), supp(i,2), xi )

       call Integ_dot_product_RTNphi_elem(elem, V_rule,  Fdof, &
            RTNphi(i, 1:Fdof, 1:2, 1:Qdof), tau_g(1:5, 1:Qdof), &
            A_RR(1:Fdof, 1:Fdof+2) )

       call Integ_divRTNphi_DGphi_elem(elem, V_rule,  Fdof, Rdof,  &
            RTNphi(i, 1:Fdof, 3, 1:Qdof), DGphi(i, 1:Rdof, 1:Qdof), &
            tau_g(3, 1:Qdof), A_RD(1:Fdof+2, 1:Rdof) )

       ! correction due to the presence of HG nodes
       if(inner .and. i_var == 0) &
            rez2(1) = rez2(1) + dot_product( tau_g(3, 1:Qdof), V_rule%weights(1:Qdof)) * elem%F%JF0 / 2 
       rez2(2) = rez2(2) +  elem%F%JF0 / 2   ! area
       !endif


        ! if(ip == 1 .and. i==1 ) then
        !  !  write(89,*) gridN%nelem
        !  !  write(89,'(2i5,10es14.6)') deg, 3*(deg1+1), A_RR(1,1), A_RR(3*deg1+1, 3*deg1+1)
       ! write(*,*) 'deg = ', deg, 'RTN matrix  ', 1,'..', 3*(deg+1),' *** ||  *** ',3*(deg+1) +1,'...', Fdof
       !     write(*,'(a6,400i12)') '   ',1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
       !      do j=1,Fdof
       !         write(*,'(2i5,400es12.4)')i,j,A_RR(j,:)
       !      enddo
       !      print*,'--------------  A_RR matrix',i, A(1,2)
       !      stop
        ! endif

       ! if(ip == 1) then
       !     do j=1,Fdof+1
       !        write(*,'(2i3,20es12.4)')i,j,A_RD(j,:)
       !     enddo
       !     print*,'--------------  A_RG matrix'
        ! endif

       call CreateTransfPairs(deg, F_size, N, i, ie, itrans(i, 1: F_size, 1:4), inner )

       !print*,'-------A1',ip, i, elem%i
       call AssembMatrixRTN_RTN(A(1:dNF, 1:dNF+2), A_RR(1:Fdof, 1:Fdof+2), &
            dNF, Fdof, F_size, itrans(i, 1: F_size, 1:2) )

       !if(ip == 1) call WriteArray(A, dNF, dNF+2, 1, dNF, 1, dNF+2)

       !print*,'-------A2',ip

       call AssembMatrixRTN_DG(B(1:dNF+2, 1:dNR), A_RD(1:Fdof+2, 1:Rdof), &
            dNF, dNR, Fdof, Rdof, F_size, itrans(i, 1: F_size, 1:2), i, N, inner)


       !if(ip == 1) call WriteArray(B, dNF+1, dNR, 1, dNF+1, 1, dNR)


       if(.not. innerR) then
          ! setting of nonhomogeneous Dirichlet BC
          !if(ip == iptest) print*,'###',ip,elem%i,i,supp(i,1:2)
          if(i == 1 ) then
             !call Eval_Dir_BC(elem, deg, F_face, ie, .true., uD(1:F_face, 1) )
             call Eval_Dir_BC_L2proj(elem, deg, F_face, ie, .true., uD(1:F_face, 1) )
             !if(ip == 1) write(*,'(a10,i5,8es12.4)') 'proj 1',i,uD(1:F_face, 1)

             ! adding to the RHS
             ie0 = ie
             do k=1,F_face
                kk = (ie0 -1) * F_face + k
                !print*,'i=1',k, kk
                A_RR(1:Fdof, Fdof+2) = A_RR(1:Fdof, Fdof+2) - uD(k, 1) *  A_RR(1:Fdof, kk )
                A_RD(Fdof+2, 1:Rdof ) = A_RD(Fdof+2, 1:Rdof ) - uD(k,1) * A_RD(kk, 1:Rdof)
             enddo
          endif

          if(i == N ) then
             !call Eval_Dir_BC(elem, deg, F_face, ie, .false., uD(1:F_face, 2) )
             call Eval_Dir_BC_L2proj(elem, deg, F_face, ie, .false., uD(1:F_face, 2) )
             !if(ip == 1) write(*,'(a10,i5,8es12.4)') 'proj N',i,uD(1:F_face, 2)

             !stop

             ! adding to the RHS
             ie1 = mod(ie , 3) + 1
             ie0 = mod(ie1, 3) + 1
             do k=1,F_face
                kk = (ie0 -1) * F_face + k
                !print*,'i=N',k, kk
                A_RR(1:Fdof, Fdof+2) = A_RR(1:Fdof, Fdof+2) - uD(k, 2) *  A_RR(1:Fdof, kk )
                A_RD(Fdof+2, 1:Rdof ) = A_RD(Fdof+2, 1:Rdof ) - uD(k,2) * A_RD(kk, 1:Rdof)
             enddo

          endif


          !print*,'-------A3',ip
          call AssembMatrixRTN_RTN(A_pt(1:dNF_pt, 1:dNF_pt+2), A_RR(1:Fdof, 1:Fdof+2), &
               dNF_pt, Fdof, F_size, itrans(i, 1: F_size, 3:4) )

          !if(ip == 1) then
          !   print*,'-------A3',ip, i, N
          !   call WriteArray(A_pt, dNF_pt, dNF_pt+2, 1, dNF_pt, 1, dNF_pt+2)
          !endif

          !print*,'-------A5',ip

          call AssembMatrixRTN_DG(B_pt(1:dNF_pt+2, 1:dNR_pt), A_RD(1:Fdof+2, 1:Rdof), &
               dNF_pt, dNR_pt, Fdof, Rdof, F_size, itrans(i, 1: F_size, 3:4), i, N, .true.)


          !if(ip == 1) then
          !   print*,'-------A5',ip, i, N
          !   call WriteArray(B_pt, dNF_pt+1, dNR_pt, 1, dNF_pt+1, 1, dNR_pt)
          !endif

       endif

       !!if(ip == 1)


       !stop 'stopped ERTGVBNHG'

       !  allocate( Fx(1:Qdof, 1:nbDim) )
       ! !integration nodes on K
       !  call ComputeF(elem, Qdof, V_rule%lambda(1:Qdof,1:nbDim), &
       !       Fx(1:Qdof, 1:nbDim) )

       !  do j=1,Qdof
       !     write(70+i,*) V_rule%lambda(j, 1:nbDim), Fx(j,1:2),DGphi(i,1:Rdof,j)
       !     write(100+i,*)Fx(j,1:2), RTNphi(i,1:Fdof,1,j)
       !     write(200+i,*)Fx(j,1:2), RTNphi(i,1:Fdof,2,j)
       !     write(300+i,*)Fx(j,1:2), RTNphi(i,1:Fdof,3,j)
       !     write(400+i,*)Fx(j,1:2), tau_g(1:3,j)
       !  enddo

       !  deallocate(Fx)


    enddo   ! do i=1,N


    if(ip == 1) then
       call WriteArray(A, dNF, dNF+1, 1, dNF, 1, dNF+1)
       ! if(ip == 15)
       call WriteArray(B, dNF+1, dNR, 1, dNF+1, 1, dNR)
    endif

    
    ! setting of the matrices of the space-time Neumann problem
    st_dNF = dNF * Tdof
    st_dNR = dNR * Tdof
    st_dN  = dN  * Tdof

    write(*,'(a10, i8, l3, 18i5)') 'matrix dims', &
         ip, inner, N, deg, dNF, dNR, dN, st_dNF, st_dNR, st_dN


    allocate(stA(1:st_dNF, 1:st_dNF+2), stB(1:st_dNF+2, 1:st_dNR) )
    !allocate( rhs(1:st_dN, 1:2), x(1:sT_dN, 1:2) )
    stA(:,:) = 0.
    stB(:,:) = 0.


    ! privisional setting of stA, stB
    do i= 1,Tdof
       do j= 1,Tdof
          k1 = (i-1)*dNF
          k2 = (j-1)*dNF
          k3 = (j-1)*dNR
          stA(k1+1:k1+dNF, k2+1:k2+dNF) = TT(i,j) * A(1:dNF, 1:dNF)
          stB(k1+1:k1+dNF, k3+1:k3+dNR) = TT(i,j) * B(1:dNF, 1:dNR)
       enddo
    enddo
       
    !if(.not. innerR ) stop 'Stopped in WQRTY'
    print*,'#############################################################'
    !if(ip == 15 ) stop 'Stopped in WQRTY'


    if(ip == 1) then
       call WriteArray(stA, st_dNF, st_dNF, 1, st_dNF, 1, st_dNF)
       ! if(ip == 15)
       call WriteArray(stB, st_dNF, st_dNR, 1, st_dNF, 1, st_dNR)
    endif
    
    rhs(:,:) = 0.
    ! setting of RHS for flux
    rhs(    1 : dNF     , 1 ) =  A(1:dNF, dNF + 1 )
    rhs(dnF+1 : dNF +dNR, 1 ) =  B(dNF + 1, 1:dNR )

    ! setting of RHS for potential
    rhs(    1 : dNF     , 2 ) =  A(1:dNF, dNF + 2 )
    rhs(dnF+1 : dNF +dNR, 2 ) =  B(dNF + 2, 1:dNR )
    !write(*,'(a6,40es12.4)') 'rhsB', rhs(dnF+1 : dNF +dNR, 2 )

    !write(201,*) 'Schur:', ip, innerR, 2, gridN%x(ip, 1:2), dNF, dNR
    !call SchurComplements(dNF, dNR, A(1:dNF, 1:dNF), -B(1:dNF, 1:dNR), 2, &
    !     rhs(1:dNF, 1:2), rhs(dNF+1:dNF+dNR, 1:2), &
    !     x(1:dNF, 1:2), x(dNF+1:dNF+dNR, 1:2)  )


    !call SchurComplements( &
    call SchurComplementsNEW( state%space%adapt%adapt_level, ip,  N, deg, gridN%x(ip,1:2), &
    !call SchurComplements_ITER( &
         dNF, dNR, A(1:dNF, 1:dNF), -B(1:dNF, 1:dNR), 2, &
         rhs(1:dNF, 1:2), rhs(dNF+1:dNF+dNR, 1:2), &
         x(1:dNF, 1:2), x(dNF+1:dNF+dNR, 1:2)  )


    !write(*,'(a6,50es11.3)') 'A x1:', x(1:dN,1)
    !write(*,'(a6,50es11.3)') 'A x2:', x(1:dN,2)

    ! potential reconstruction for potential, different algebraic problem
    if(.not. innerR) then
       rhs_pt(:,:) = 0.
       ! setting of RHS for flux no necessary
       !rhs_pt(       1 : dNF_pt        , 1 ) =  A_pt(  1:dNF_pt, dNF_pt + 1 )
       !rhs_pt(dnF_pt+1 : dNF_pt +dNR_pt, 1 ) =  B_pt(dNF_pt + 1, 1:dNR_pt )

       ! setting of RHS for potential
       rhs_pt(   1 : dNF_pt     , 2 ) =  A_pt(1:dNF_pt, dNF_pt + 2 )
       rhs_pt(dnF_pt+1 : dNF_pt +dNR_pt, 2 ) =  B_pt(dNF_pt + 2, 1:dNR_pt )
       !write(*,'(a6,40es12.4)') 'rhsB', rhs_pt(dnF_pt+1 : dNF_pt +dNR_pt, 2 )


       !print*,'---------',ip
       !call WriteArray(A_pt, dNF_pt, dNF_pt+2, 1, dNF_pt, 1, dNF_pt+2)
       !call WriteArray(B_pt, dNF_pt+1, dNR_pt, 1, dNF_pt+1, 1, dNR_pt)

       !write(201,*) 'Schur:', ip, innerR, 1, gridN%x(ip, 1:2), dNF_pt, dNR_pt
       ! call SchurComplements(dNF_pt, dNR_pt, A_pt(1:dNF_pt, 1:dNF_pt), &
       !      -B_pt(1:dNF_pt, 1:dNR_pt), 1, &
       !      rhs_pt(1:dNF_pt, 2), rhs_pt(dNF_pt+1:dNF_pt+dNR_pt, 2), &
       !      x_pt(1:dNF_pt, 2), x_pt(dNF_pt+1 : dNF_pt+dNR_pt, 2)  )

       !call SchurComplements( &
       call SchurComplementsNEW( state%space%adapt%adapt_level, -ip, N, deg, gridN%x(ip,1:2), &
       !call SchurComplements_ITER( &
            dNF_pt, dNR_pt, A_pt(1:dNF_pt, 1:dNF_pt), &
            -B_pt(1:dNF_pt, 1:dNR_pt), 1, &
            rhs_pt(1:dNF_pt, 2), rhs_pt(dNF_pt+1:dNF_pt+dNR_pt, 2), &
            x_pt(1:dNF_pt, 2), x_pt(dNF_pt+1 : dNF_pt+dNR_pt, 2)  )

       !write(*,'(a6,50es11.3)') 'B x1:', x_pt(1:dN_pt,1)
       !write(*,'(a6,50es11.3)') 'B x2:', x_pt(1:dN_pt,2)

    endif

    !if(ip == 15) then
    !print*,'**********************************   AFTER SCHUR'
    !  if(ip == 15) call WriteArray(B, dNF+1, dNR, 1, dNF+1, 1, dNR)
    !    print*
    !    write(*,'(a4,50es11.3)') 'Bx2:', &
    !         matmul(transpose(B(1:dNF, 1:dNR)), x(1:dNF,2))
    ! endif

    ! setting of the solution sigma in integ. nodes
    allocate( sigma(1:N, 1:2, 1:Qdof) )

    do i=1, N
       elem => gridN%elem(supp(i,1))
       ie = supp(i,2)   ! inner index of the vertex

       !if(elem%i == 1) write(*,'(a8,3i5,300es12.4)') '3e0l-',elem%i, i, ie,  x_pt(1:dNF_pt, 2)
        
       if(innerR) then
          ! both flux and potential reconstruction
          !write(*,'(a10,10i5)') 'All',i,elem%i,ie
          call AssociateElementReconstruction(elem, N, dNF, Fdof, V_rule, ip, i, ie, deg, 2, &
               F_size, itrans(i, 1: F_size, 1:2), &
               x(1:dNF, 1:2), RTNphi(i, 1:Fdof, 1:3, 1:V_rule%Qdof), F_face )
       else
          ! flux reconstruction
          !if(N==1) write(*,'(a10,10i5)') 'flux',i,elem%i,ie
          call AssociateElementReconstruction(elem, N, dNF, Fdof, V_rule, ip, i, ie, deg, 1, &
               F_size, itrans(i, 1: F_size, 1:2), &
               x(1:dNF, 1), RTNphi(i, 1:Fdof, 1:3, 1:V_rule%Qdof), F_face )

          !if(N==1) write(*,'(a10,10i5)') 'potent',i,elem%i,ie

          ! potential  reconstruction
          if(i == 1 .or. i == N ) then
             ! adding of the non-homogeneous Dirichlet BC
             call AssociateElementReconstruction(elem, N, dNF_pt, Fdof, V_rule, ip, i, ie, deg, -1, &
                  F_size, itrans(i, 1: F_size, 3:4), &
                  x_pt(1:dNF_pt, 2), RTNphi(i, 1:Fdof, 1:3, 1:V_rule%Qdof), F_face, uD(1:F_face, 1:2) )
             ! adding of the non-homogeneous Dirichlet BC  -------------------------^^^^^^^^^^^^^^^^^
          else

             call AssociateElementReconstruction(elem, N, dNF_pt, Fdof, V_rule, ip, i, ie, deg, -1, &
                  F_size, itrans(i, 1: F_size, 3:4), &
                  x_pt(1:dNF_pt, 2), RTNphi(i, 1:Fdof, 1:3, 1:V_rule%Qdof), F_face )
          endif

       endif

    enddo

    !if(inner) 
    rez2(3) = rez2(1) / rez2(2)

    !if(abs(rez2(3)) > 1E-10) write(*,'(a8, i5, l3, 6es12.4)') 'REZ2:', ip, inner, rez2(1:3), grid%x(ip, 1:2)
    !stop "ed3e3e3"
    
    deallocate(itrans)

    deallocate(A_RR, A_RD)
    deallocate(DGphi, RTNphi )
    deallocate(rhs, x)
    deallocate(A, B)
    deallocate(tau_g, sigma)
    deallocate(stA, stB)
    deallocate(TT)
    
    if(.not. innerR ) then
       deallocate(A_pt, B_pt, rhs_pt, x_pt, uD)
    endif

    !if(.not. inner) then
    !if(ip == 3) then
    !if( inner) then
    !print*,'stopped in LocalNeumannVertexProblem, ip = ',ip
    !   stop
    !endif

    !endif
    
  end subroutine LocalNeumannSTVertexProblem


  !> evaluate the space fluxes and Radau reconstruction in integ nodes,
  !> they will be used in the local space-time Neumann problems
  subroutine  Prepare_RTNst_Neumann( )
    class(element), pointer :: elem
    real, dimension(:,:,:), allocatable :: reconstr, reconstr_err
    type(volume_rule), pointer :: V_rule
    class( Time_rule), pointer :: T_rule
    real, dimension(:,:), allocatable :: Fx

    integer :: i, j, l, dof, Tdof, Qdof, tQnum, tdeg !, TQdof
    

    ! degree of the polynomial Radau reconstruction
    tdeg =  state%time%deg + 1

    T_rule => state%time%T_rule( tdeg )
    select type ( T_rule )
    type is ( RadauTime_rule )
       !print*, 'time%deg=', state%time%deg, ' # nodes: ',T_rule%Qdeg

       if ( .not. allocated( T_rule%RadauPol ) ) &
            call T_rule%evalRadauPolynomial()
       !call T_rule%testRadauPol()

    type is ( GaussTime_rule )
       stop 'Radau polynomial cannot be computed with Gauss quadrature'

    class default
       stop ' Abstract Time_rule - should be specified (Radau or Gauss) '
    end select

    do i=1, grid%nelem
       elem => grid%elem(i)
       dof = elem%dof
       Qdof = elem%Qdof
       Tdof = elem%Tdof

       tQnum = state%time%Qnum
       !T_rule => state%time%T_rule(tQnum)

       ! allocattion of the arrays for the storing of fluxes and Radau reconstruction
       allocate(elem%rtn_flux(1:Qdof, 1:tQnum, 1:nbDim, 1:ndim), source = 0.0)
       allocate(elem%rtn_radauD(1:ndim, 1:Qdof, 1:tQnum), source = 0.0)

       ! local arrays
       allocate(reconstr(1:ndim, 1:dof, 1:Tdof+1), source = 0.0)
       allocate(reconstr_err(1:ndim, 1:dof, 1:Tdof+1), source = 0.0)

       ! Radau reconstruction in DG basis functions
       call evalRadauReconstruct( elem , reconstr(1:ndim, 1:dof, 1:Tdof+1), &
            reconstr_err(1:ndim, 1:dof, 1:Tdof+1 ) )

       ! evaluate || \vartheta(w) - R ||
       elem%eta( RTNradau2, 1:ndim ) = &
            evalL2STNorm_Elem( elem, state%time%tau(1), ndim, elem%dof, Tdof+1, &
            reconstr_err(1:ndim, 1:dof, 1:Tdof+1 )  )


       ! time derivatives of the Radau reconstruction in integ nodes
       do j = 1, tQnum
          elem%rtn_radauD(1:ndim, 1:Qdof, j) = evalSTfunInIntTime_Der( elem,  state%time%tau(1), &
               ndim, dof, Tdof+1, reconstr(1:ndim, 1:elem%dof, 1:Tdof + 1 ), j, tQnum )
       end do ! j 


       
       if(state%modelName == 'porous' ) then
          call ComputePhysicalFluxes( elem, tQnum, &
               Set_R_s_porous, Set_K_sk_porous, Set_f_s_empty,  Set_Ppm_empty, Set_S_empty)
       else
          print*,'The model', state%modelName,' is not implemented'
          stop
       endif
       
          
       deallocate(reconstr, reconstr_err)


       V_rule => state%space%V_rule(elem%Qnum)
       if(V_rule%Qdof /= Qdof) stop "f943u933o"
       allocate( Fx(1:Qdof, 1:nbDim) )

       !integration nodes on K
       call ComputeF(elem, Qdof, V_rule%lambda(1:Qdof,1:nbDim), Fx(1:Qdof, 1:nbDim) )
       do j = 1, tQnum
          do l=1,Qdof
             write(100+j, *) Fx(l, 1:2), elem%rtn_radauD(1:ndim, l, j), &
                  elem%rtn_flux(l, j, 1:nbDim, 1:ndim)
          enddo
       enddo
       deallocate(Fx)
       
    enddo  ! do i=1, grid%nelem
    stop "subroutine Prepare_RTNst_Neumann"
    
  end subroutine Prepare_RTNst_Neumann




  subroutine ComputePhysicalFluxes( elem , tQnum, Set_R_s, Set_K_sk, Set_f_s, Set_Ppm, Set_S )
    interface
       subroutine Set_R_s(ndimL, nbDim, iRe, Qdof, w, Dw, Re_1, R_s, xi)
         integer, intent(in) :: ndimL, nbDim, iRe, Qdof
         real, dimension(1:Qdof, 1:ndimL), intent(in):: w !state  w in #Qdof nodes
         real, dimension(1:Qdof, 1:ndimL, 1:nbDim), intent(in):: Dw !state  Dw in #Qdof nodes
         real, dimension(1:iRe, 1:Qdof), intent(in) :: Re_1        ! inverse of Reynolds number
         !real, intent(in) :: Re_1                     ! inverse of Reynolds number
         real, dimension(1:Qdof, 1:nbDim, 1:ndimL), intent(inout) :: R_s
          real, dimension(1:Qdof, 1:nbDim), intent(in):: xi ! physical coordinates
        end subroutine Set_R_s
        subroutine Set_K_sk(ndimL, nbDim, iRe, Qdof, w, Dw, Re_1, K_sk, xi)
          integer, intent(in) :: ndimL, nbDim, iRe, Qdof
          real, dimension(1:Qdof, 1:ndimL), intent(in):: w !state  w in #Qdof nodes
          real, dimension(1:Qdof, 1:ndimL, 1:nbDim), intent(in):: Dw !state  Dw in #Qdof nodes
          real, dimension(1:iRe, 1:Qdof), intent(in) :: Re_1        ! inverse of Reynolds number
          real, dimension(1:Qdof,1:nbDim,1:nbDim,1:ndimL,ndimL), intent(inout) :: K_sk
          real, dimension(1:Qdof, 1:nbDim), intent(in):: xi ! physical coordinates
        end subroutine Set_K_sk
        subroutine Set_f_s(ndimL, nbDim, Qdof, w, f_s, x, ie )
          integer, intent(in) :: Qdof, ndimL, nbDim
          real, dimension(1:Qdof, 1:ndimL), intent(in):: w !state  w in #Qdof nodes
          real, dimension(1:Qdof,1:nbDim,1:ndimL), intent(inout) :: f_s
          real, dimension(1:Qdof,1 :nbDim), intent(in) :: x
          integer, intent(in) :: ie
        end subroutine Set_f_s
        subroutine Set_Ppm( ndimL, nbDim, Qdof, w, n, xi, Ppm, one_over_area, elem, ie)
          import :: element
          integer, intent(in) :: Qdof, ndimL, nbDim
          real, dimension(1:Qdof, 1:ndimL), intent(in):: w !state  w in #Qdof nodes
          real, dimension(1:Qdof,1:nbDim,1:ndimL,1:ndimL), intent(inout) :: Ppm
          ! matrices Ppm in  -- " --
          real, dimension(1:Qdof, 1:nbDim), intent(in) :: n   ! outer normal
          real, dimension(1:Qdof, 1:nbDim),intent(in) ::  xi                    ! node on the edge?
          real, intent(in), optional :: one_over_area
          class(element), intent(inout), optional :: elem
          integer, intent( in ), optional :: ie !not used
        end subroutine Set_Ppm
        subroutine Set_S(ndimL, nbDim, Qdof, xi, w, Dw, S)
          integer, intent(in) :: ndimL, nbDim, Qdof
          real, dimension(1:Qdof, 1:nbDim), intent(in) :: xi
          real, dimension(1:Qdof, 1:ndimL), intent(in):: w !state  w in #Qdof nodes
          real, dimension(1:Qdof, 1:ndimL, 1:nbDim), intent(in):: Dw !state  Dw in #Qdof nodes
          real, dimension(1:Qdof, 1:ndimL), intent(inout) :: S
        end subroutine Set_S
     end interface
     
     class(element), intent(inout) :: elem
     integer, intent(in) :: tQnum !number of time moments
     real, dimension(:,:), allocatable :: Re_1  ! inverse of Reynolds number in integ nodes
     real, dimension(:,:), allocatable :: wi  ! w recomputed  in integration nodes
     real, dimension(:,:,:), allocatable :: Dwi  ! Dw recomputed  in integration nodes
     !real, dimension(:,:,:), allocatable :: R_s  ! R_s(w) recomputed  in integration nodes

     integer :: alpha, dof, Qdof 


     dof = elem%dof
     Qdof = elem%Qdof
     
     allocate(Re_1(1:iRe, 1:Qdof), source = 0.0 )

     if(state%model%Re > 0.) then
        Re_1(1, 1:Qdof) = 1./state%model%Re
     endif
     
     Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(0, 1:Qdof, 2+1:2+iRe-1) )

     
     ! solution and its derivatives in integ nodes
     allocate(wi(1:Qdof,1:ndim) )
     allocate(Dwi(1:Qdof, 1:ndim, 1:nbDim))

     ! we go over time integration nodes
     do alpha = 1, tQnum
        
        if (state%wActual) then
           call Transfer_wST_to_wActual_Elem(elem , alpha, tQnum)
        else
           call Transfer_wST_to_w_Elem(elem , alpha, tQnum)
        endif

        ! solution in integ nodes
        call Eval_w_Elem(elem, wi(1:Qdof,1:ndim) )

        ! evaluation of the gradient of w in integ nodes
        call Eval_Dw_Elem(elem, Dwi(1:Qdof, 1:ndim, 1:nbDim) )

        ! evaluation of the fluxes
        call Set_R_s(ndim, nbDim, iRe, Qdof, wi(1:Qdof,1:ndim), Dwi(1:Qdof, 1:ndim, 1:nbDim), &
             Re_1(1:iRe, 1:Qdof), elem%rtn_flux(1:Qdof, alpha, 1:nbDim, 1:ndim), &
             elem%xi(0, 1:Qdof, 1:nbDim))

        
      end do !alpha
      
      deallocate(wi, Dwi, Re_1)

   end subroutine ComputePhysicalFluxes
      
end module neumann_st_estim

