!> parameters \f$ \eta = 1/\tau\f$ and \f$  \iota \f$ used in double problem
module matrix_oper_int
implicit none

  public:: eta, iota

  real:: eta = 0., iota(2) = (/0., 0./)

end module

!> matrix operations for sparse (block) matrices
module matrix_oper
  use lapack_oper
  use geometry
  use element_mod
  use define_state
  use main_data
  use data_mod

  implicit none

!!  public:: sMVprod            ! product of a sparse matrix and a vector OLD
  public:: MatrixNormFrobenius
  public:: null_precond              ! No preconditioner
  public:: BlockJacobi        ! block Jacobi preconditioner
  public:: bMassInvVprod      ! block inverse mass matrix - vector product
  public:: bMVprod            ! block matrix(M+tC)-vector product
  public:: bMVprodST	      ! block matrix(M+tC)-vector product in STDGM
  public:: bMVprodST_Dual	      ! block matrix(M+tC)-vector product in STDGM DUAL PROBLEM
  public:: bMVprodBIG         ! block matrix(M+tC)-vector product in STDGM using the matrix blocks bigBlock
  public:: bMVprodBIG_Dual
  public:: MGbMVprod	      ! !! be sure grid%elem%MG*** is initialized !!
  public:: bMVprod2           ! double block matrix-vector product
  public:: bMVprodA           ! block matrix(A)-vector product
  public:: bMVprodOffC        ! block matrix-vector product only off diagonal terms
  public:: bMVmassprod        ! block mass matrix - vector product
  public:: EvalSSresid        ! eval steady-state residuum := C(w)w - q(w)
  !public:: EvalSSresidDirect  ! eval steady-state residuum := f(w) (= q(w) - C(w)w )
  public:: EvalSSresidExplicit! eval steady-state residuum := f(w) (= q(w) - C(w)w )
  public:: EvalWeightTDresid  ! eval weighted Time Dependent  residuum

  public:: VectorPrecondNorm  ! "preconditioned" norm of the vector
  public:: VectorScaleNorm    ! "scaled" norm of the vector
  public:: bMVnull            ! no preconditioner
  public:: bMVdiagprod        ! diag-block matrix-vector product
  public:: bMVdiagprod2       ! double diag-block matrix-vector product
  public:: bMVdiagprodST 		! diag-block matrix-vector product for STDGM
  public:: bMVdiagprodST_Dual ! diag-block matrix-vector product for STDGM DUAL PROBLEM
  public:: bMViLUprod         ! performs ILU preconditioning
  public:: bMViLUprod2        ! performs double ILU preconditioning
  public:: bMViLUprodST 		! performs ILU preconditioning for STDGM
  public:: bMViLUprodST_Dual  ! performs ILU preconditioning for STDGM DUAL PROBLEM
  public:: bMViLUprodBIG
  public:: bMViLUprodBIG_Dual



  !public:: CopyBlocksSTtoBlockPlus ! copy bigger diagonal block to blockPlus, used for Ritz reconstruction
  public:: TaylorSolution     ! solution of (M+\tau C) w = b with Taylor serie
  public:: WriteMatrixA       ! write the matrix (M+\tau C)
  public:: WriteMatrixA_ST       ! write the matrix (1/tauM+C) for STDGM
  public:: WriteMatrixST_Blocks   ! write the nonzeroblocks of matrix (1/tau M+ C) for STDGM
  public:: WriteMatrixST       ! write the RefTimeMatrix for STDGM
  public:: test_bMVprodST     !testing bMVprodST
  public:: Write_rhsST        !write RHS for STDGM

  public:: WriteMatrixAMatlab ! write the matrix (M+\tau C) into a file (vector row indices, vector column indices, nonzero values), input for Matlab
  public:: WriteMatrixLU      ! write the matrix ILU

  public:: prodFEM            !  matrix-vector product for conforming FEM
  public:: diagFEM            !  diagonal preconditioner for conforming FEM

contains

  ! evaluate the Froenius norm of the block matrix elem%block(:)
  function MatrixNormFrobenius()
    real :: MatrixNormFrobenius
    class(element), pointer:: elem
    integer :: i,k,in,j, ndof, ndof1
    real :: val

    val = 0
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof*ndim

       do j=1,ndof
          val = val + dot_product(elem%block(0)%Mb(j,1:ndof), elem%block(0)%Mb(j,1:ndof))
       enddo

       do k=1,elem%flen
          in = elem%face(neigh,k)
          if(in >0) then
             ndof1 = grid%elem(in)%dof * ndim

             do j=1,ndof
                val = val + dot_product(elem%block(k)%Mb(j,1:ndof1), &
                     elem%block(k)%Mb(j,1:ndof1) )
             enddo
          endif
       enddo

    enddo

    MatrixNormFrobenius = val**0.5

  end function MatrixNormFrobenius

  subroutine WriteMatrixA(eta)
    real, intent(in) :: eta
    class(mesh), pointer :: grid_print
    class(element), pointer:: elem,elem1 ! one element
    real, dimension(:),allocatable:: accum
    integer :: i,k,in,j,j1, k1, is, is1, dof, ndof, ndof1


    grid_print => grid
    if(state%local_problem) grid_print => gridL

    allocate(accum(1:state%nsize))

    print*,'--------------  matrix 1/tau*M + C  -----------------', state%nsize
    do i=1,grid_print%nelem
       elem => grid_print%elem(i)
       is = elem%ncv
       dof = elem%dof
       ndof = dof*ndim

       !do j=1,ndof
       do k1=1,ndim
          do j1= 1, dof

             j = (k1 -1)*dof + j1

             accum(:) = 0.

             if(eta /= 0.) then
                !print*,'^^^',j,dof, is, is + (j-1)/dof *dof, is + (j-1)/dof *dof + dof-1,'||', &
                !     size(elem%Mass%Mb, 1), size(elem%Mass%Mb, 2)

                accum(is + (j-1)/dof *dof: is + (j-1)/dof *dof + dof-1) &
                     = eta * elem%Mass%Mb(j1,1:dof)
             endif

             accum(is:is+ndof-1) =  accum(is:is+ndof-1) + elem%block(0)%Mb(j,1:ndof)


             do k=1,elem%flen
                in = elem%face(neigh,k)
                if(in >0) then
                   elem1 => grid_print%elem(in)
                   ndof1 = elem1%dof * ndim
                   is1 = elem1%ncv
                   accum(is1:is1+ndof1-1) = elem%block(k)%Mb(j,1:ndof1)
                endif
             enddo

             !  write(*,'(i5,a2,100es11.3)')is+j-1,': ',accum(:)
             write(*,'(i5,a2,500es9.1)')is+j-1,': ',accum(:) !
          enddo ! j1
       enddo ! k1
    enddo ! i
    print*,'-----------end of   matrix 1/tau*M + C  -----------------'

    deallocate(accum)
  end subroutine WriteMatrixA

  subroutine test_bMVprodST()
    real, dimension(:,:), allocatable :: b
    real, dimension(:), allocatable ::   x
    class(element), pointer :: elem, elem1
    integer :: i,j,k, m, ndof,ndof1,is,is1, face

    open (58, file="bMVprodST-ADGo", action ="write", status="replace")

    allocate( b( 1:state%nsize,1:state%nsize))
    allocate( x( 1:state%nsize))
    b(:,:) = 0

    do i = 1, state%nsize
       x(:) = 0
       x(i) = 1
       call bMVprodST(b(:,i),x,state%nsize)
    enddo !i

    ! 	do i = 1,state%nsize
    ! 		write(58,'(100es10.2)'), b(i,:)
    ! 	enddo

    do i = 1, grid%nelem
       elem => grid%elem(i)
       ndof = elem%Tdof * elem%dof * ndim
       is = elem%ncv
       write(58,*), 'Element ', i
       write(58,*), 'block(0):', is, is + ndof  - 1
       do j = 1, ndof
          write(58,'(200es12.4)'), b(is + j - 1 , is : is + ndof  - 1)
       enddo !j

       do j = 1,elem%flen
          face = elem%face(neigh,j)
          if (face > 0) then
             write(58,*), 'block(',j,'):'
             elem1 => grid%elem(face)
             ndof1 = elem1%dof * ndim * elem1%Tdof
             is1 = elem1%ncv

             do m = 1, ndof
            	write(58, '(200es12.4)' ) b(is + m - 1, is1 : is1 + ndof1 - 1)
             enddo !m

          endif
       enddo !j

       write(58,*), '--------------------------------'
    enddo !i

    close(58)
     print*, ''
     print*, 'End of test_bMVprodST - it shouldnt be used for large systems!!!!!!!'
    deallocate(x)
    deallocate(b)

  end subroutine test_bMVprodST


  subroutine Write_PrimalDualSolution(ifile)
    integer, intent(in) :: ifile
    class(element), pointer :: elem, elem1
    integer :: ie, kvec,ivec,dof, k, l, ndof,Tdof

    print*,'Write_PrimalDualSolution in file "fort.',ifile,'"'
    write(ifile,*) 'File created in  subroutine Write_PrimalDualSolution(ifile), matrix.f90'
    do ie=1,grid%nelem
      elem => grid%elem(ie)
      dof = elem%dof
      Tdof = elem%Tdof

      do l = 1, elem%Tdof
         do k = 1, ndim
             write(ifile, '(a1, 3i5, 200es12.4)') 'w',elem%i, l , k ,elem%wST(k,1:dof,l)
             write(ifile, '(a1, 3i5, 200es12.4)') 'z',elem%i, l , k ,elem%zST(k,1:dof,l)
         enddo !k
      enddo !l
   end do

 end subroutine Write_PrimalDualSolution



  
  subroutine Write_rhsST()
    real, dimension(:,:), allocatable :: b
    real, dimension(:), allocatable ::   x
    class(element), pointer :: elem, elem1
    integer :: ie, kvec,ivec,dof, k, l, ndof,Tdof

    !open (58, file="rhsST", action ="write", status="replace")
    open (58, file="bMVprodST", action ="write", status='UNKNOWN', position = 'append')

!  write(58, *), 'RHS ST'
!

!
!   do ie=1,grid%nelem
!      elem => grid%elem(ie)
!      dof = elem%dof_plus
!      Tdof = elem%Tdof
!      ndof = dof * Tdof * ndim

!      !b(ivec+1:ivec+ndof) = 0 !elem%rhsST()
!    !  kvec = ivec
!      write(58, *), 'elem=', ie, 'tdof=',Tdof, 'ndim=',ndim,'dof=', elem%dof, 'dof_plus=' , elem%dof_plus
!      do l = 1, Tdof
!         do k = 1, ndim
!            write(58, '(a5, i5, a5, i5, 100es12.4)') 'Tdof=',l,  'ndim=', k,  elem%rhsST(k,1:dof, l)
!          !  kvec = kvec + dof
!         enddo !k
!      enddo !l
!      write(58,*), '--------------------------------'
!     ! ivec = ivec + ndof
!   end do

  write(58,*), 'Newton b--------------------------------'
   ivec = 0

   do ie=1,grid%nelem
      elem => grid%elem(ie)
      dof = elem%dof
      ndof = dof * elem%Tdof * ndim


      kvec = ivec

      do l = 1, elem%Tdof
         do k = 1, ndim
             write(58, '(4i5, 200es12.4)') l , k ,kvec+1, kvec+dof, state%nlSolver%b(kvec+1:kvec + dof)
            kvec = kvec + dof
         enddo !k
      enddo !l
      write(58,*), '--------------------------------'
      ivec = ivec + ndof
   end do

   write(58,*), 'wSTfin----------------------------------'
   do ie= 1, grid%nelem
      elem => grid%elem(ie)

      do k = 1, ndim
         write(58, *) k , elem%wSTfin(k, 1:elem%dof)
      enddo
      write(58,*), '--------------------------------'
   enddo
!   write(58,*) 'wStfin_____________________'
!   do ie= 1, grid%nelem
!      elem => grid%elem(ie)
!
!      do k = 1, ndim
!         write(58, *) k , elem%wSTfin(k, 1:elem%Qdof)
!      enddo
!      write(58,*), '--------------------------------'
!   enddo

  close(58)
  end subroutine Write_rhsST



  subroutine WriteMatrixA_ST(eta)
    real, intent(in) :: eta
    class(element), pointer:: elem,elem1 ! one element
    real, dimension(:),allocatable:: accum
    integer :: i,j,k,l,m,n,p,mm,nn,kk,jj,pp,r
    integer :: is, is1, face, dof, ndof, ndof1,Tdof

    allocate(accum(1:state%nsize))

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

       print*,'--------------  matrix 1/tau*M + C  -----------------', state%nsize
       do i=1,grid%nelem
          elem => grid%elem(i)

          is = elem%ncv

          dof = elem%dof
          Tdof = elem%Tdof
          ndof = dof * ndim
      !	print*, 'Velikost block0:', size(elem%block(0)%Mb(1,:))
      !	print*, 'Velikost blockST:', size(elem%blockST(0)%Mb(1,:))
         do m = 1, Tdof
            mm = (m-1)*ndof
            do j = 1, ndim
               do k = 1, dof
                  accum(:) = 0
                  if (eta > 0) then
                  do n = 1,Tdof
                     nn = (n-1) * ndof
                     do p = 1, ndim
                        pp = (p-1)*ndim
                        accum(is + nn + pp : is + nn + pp + dof - 1) = eta *  & !eta *
                             time%refTimeMatrix%Mb(m,n) ! * elem%Mass%Mb(k,1:dof)
                     enddo !p
                  enddo !n
                  endif

                  accum(is : is + Tdof * ndof) =  accum(is : is + Tdof * ndof) &
                      + elem%blockST(0)%Mb(mm + (j-1)*ndim + k, 1:Tdof*ndof )
                  ! write(*,'(i5,a2,100es11.4)')is+mm+(j-1)*ndim + k - 1,': ',accum(is : is + Tdof * ndof - 1)

                  do r=1,elem%flen
                     face = elem%face(neigh,r)
                     if(face > 0) then
                        elem1 => grid%elem(face)
                        ndof1 = elem1%dof * ndim * elem1%Tdof
                        is1 = elem1%ncv
                        accum(is1: is1 + ndof1 -1) = elem%blockST(r)%Mb(mm + (j-1)*ndim + k, 1:ndof1 )
                     endif
                  enddo !r

                  !write(*,'(i5,a2,100es12.4)')is+mm+(j-1)*ndim + k - 1,': ',accum(:)

               enddo !k
            enddo !j
         enddo !m

      enddo !i
      class default
         stop 'Reference time matrix is allocated only for STDG'
      end select
      end associate

    deallocate(accum)
  end subroutine WriteMatrixA_ST

  !writes Matrix eta*MassST + blockST into file ../stdgm/Matrix
   subroutine WriteMatrixST_Blocks(eta)
    real, intent(in) :: eta
    class(element), pointer:: elem,elem1 ! one element
    real, dimension(:),allocatable:: accum
    integer :: i,j,k,l,m,n,p,mm,nn,kk,jj,pp,r
    integer :: is, is1, face, dof, ndof, ndof1,Tdof

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

       !open (59, file="../Tests/Matrix", action ="write", status="replace")
       open (59, file="MatrixST-ADGo", action ="write", status="replace")
      !	if ( ierror /= 0 ) then
      !		print*, "Failed to open test.dat!"
      !		stop
      !	end if


       write(59,*) '--------------  matrix 1/tau*M + C  -----------------', state%nsize
       !only 1-st elem now

       do i=1, state%time%max_Tdof
          !write(59,'(i5,a2,100f8.2)') i,': ', state%time%refTimeMatrix%Mb(i,1:state%time%max_Tdof)
          write(59,'(i5,a2,100es12.4)') i,': ', time%refTimeMatrix%Mb(i,1:time%max_Tdof)
       enddo
       write(59,*) , 'eta=' , eta
       do i=1, grid%nelem
       !do i= 1721, 1726
         write(59,*) '--------------------'
         write(59,*) 'element(', i, ')'
          elem => grid%elem(i)

          is = elem%ncv

          dof = elem%dof
          Tdof = elem%Tdof
          ndof = dof * ndim
!          write(59,*) 'Mass:'
!          do m =1,dof
!             write(59,'(100es12.4)') elem%Mass%Mb(m,:)
!          enddo !m

          allocate(accum(1:ndof*Tdof))
          !	print*, 'Velikost block0:', size(elem%block(0)%Mb(1,:))
          !	print*, 'Velikost blockST:', size(elem%blockST(0)%Mb(1,:))
          write(59,*) 'massST:'
          do m = 1, Tdof
             mm = (m-1)*ndof
             do j = 1, ndim
                do k = 1, dof
                   accum(:) = 0
!                   if (eta > 0) then
                      do n = 1,Tdof
                         nn = (n-1) * ndof
                        ! do p = 1, ndim
                            ! <> 0 only when the same component meet
                            p = j
                            pp = (p-1)*dof
                            accum(nn + pp + 1 : nn + pp + dof) = eta   & !eta *
                                 * elem%Mass%Mb(k,1:dof) * time%refTimeMatrix%Mb(m,n)
                        ! enddo !p
                      enddo !n
!                   else
!
!                   print*, 'WriteMatrixST_Blocks not implemented for eta = 0'
!                   stop
!                   endif

                   !		accum(1: Tdof * ndof) =  accum(1 : Tdof * ndof) &
                   !				 + elem%blockST(0)%Mb(mm + (j-1)*ndim + k, 1:Tdof*ndof )

                   write(59,'(i5,a2,100f15.8)') mm + (j-1)*dof + k ,': ',accum(:)
                   !write(59,'(i5,a2,200es12.4)') mm + (j-1)*dof + k ,': ',accum(:)


                   !write(*,'(i5,a2,100es8.0)')is+mm+(j-1)*ndim + k - 1,': ',accum(:)

                enddo !k
             enddo !j
          enddo !m

          deallocate(accum)

          write(59,*) 'block(0):', size(elem%blockST(0)%Mb(:,1)), 'x' , size(elem%blockST(0)%Mb(1,:))
          do m = 1, Tdof*dof*ndim
             write(59,'(i5,a2,100f15.8)') m,': ', elem%blockST(0)%Mb(m,:)
             !write(59,'(i5,a2,200es12.4)') m,': ', elem%blockST(0)%Mb(m,:)
          enddo !m

   !offdiagonal block commented
         do r=1,elem%flen
             face = elem%face(neigh,r)
             if(face > 0) then
                write(59,*), 'block(',r,'):'
                elem1 => grid%elem(face)
                ndof1 = elem1%dof * ndim * elem1%Tdof
                is1 = elem1%ncv
                allocate(accum(1:ndof1))
                do m = 1, ndof*Tdof
                   accum(1: ndof1) = elem%blockST(r)%Mb( m, 1:ndof1 )
                   write(59,'(i5,a2,100f15.8)'), m - 1,': ',accum(:)
                   !                   			write(59,'(i5,a2,100es10.2)'), m - 1,': ',accum(:)
                enddo !m
                deallocate(accum)
             endif
          enddo !r
          
       enddo !i


      close(59)

   class default
      stop  'WriteMatrixST_Blocks for STDGM only'
   end select
   end associate

  end subroutine WriteMatrixST_Blocks

  subroutine WriteMatrixST()
    real, dimension(:),allocatable:: accum
    integer :: Tdof,i,j

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

      Tdof = state%time%max_Tdof

      !allocate(accum(1:state%nsize))

      !print*, 'nelem=', grid%nelem

      print*,'--------------  Time part of Mass matrix -----------------', Tdof


         print*, 'Tdof = ', Tdof
         do i = 1,Tdof
            do j = 1,Tdof
               write(*,'(i5,a2,i5,a2,100es11.4)') i,',',j,': ', time%refTimeMatrix%Mb(i,j)
      !         write(*,'(a2, es5.5)') '  ',
      !
            enddo !j
         enddo !i =1,Tdof

    class default
      stop  'WriteMatrixST_Blocks for STDGM only'
    end select
    end associate


  end subroutine WriteMatrixST


    subroutine WriteMatrixAMatlab(eta)
    real, intent(in) :: eta
    class(element), pointer:: elem,elem1 ! one element
    real, dimension(:),allocatable:: accum
    integer :: i,k,in,j,is, is1, dof, ndof, ndof1
    character(len=7) :: MatrixA
    integer :: l

    allocate(accum(1:state%nsize))

    MatrixA = 'MatrixA'
    open(21, file=MatrixA, status='UNKNOWN', position = 'append')

    print*,'--------------  matrix 1/tau*M + C  -------------', state%nsize,',  eta =',eta
    do i=1,grid%nelem
       elem => grid%elem(i)
       is = elem%ncv
       dof = elem%dof
       ndof = dof*ndim

       do j=1,ndof
          accum(:) = 0.

          if(eta /= 0.) then
             print*,'^^^',j,dof, is + (j-1)/dof *dof, is + (j-1)/dof *dof + dof-1
             accum(is + (j-1)/dof *dof: is + (j-1)/dof *dof + dof-1) &
                  = eta * elem%Mass%Mb(j,1:dof)
          endif

          accum(is:is+ndof-1) =  accum(is:is+ndof-1) + elem%block(0)%Mb(j,1:ndof)


          do k=1,elem%flen
             in = elem%face(neigh,k)
             if(in >0) then
                elem1 => grid%elem(in)
                ndof1 = elem1%dof * ndim
                is1 = elem1%ncv
                accum(is1:is1+ndof1-1) = elem%block(k)%Mb(j,1:ndof1)
             endif
          enddo

          !write(*,'(i5,a2,100es11.3)')is+j-1,': ',accum(:)
          do l=1,state%nsize
            if ( accum(l)/= 0.  ) then
               write(21,'(i5,i5,5000es14.6)') is+j-1, l, accum(l)
            endif
          enddo !l

       enddo
    enddo
    print*,'-----------end of   matrix 1/tau*M + C  -----------------'

    close(21)

    deallocate(accum)
  end subroutine WriteMatrixAMatlab

  subroutine WriteBiCG_Matlab_All(nsize, inum, b, c, x, y)
    integer, intent (in):: nsize ! size of algebraic problems
    integer, intent (in):: inum  ! index of level
    real, dimension(1:nsize), intent(in) :: b,c, x, y
    integer :: im, ib, ic, ix, iy, is, num_size, text_size
    character(len=5) :: ch5
    character(len=50) :: ch_m, ch_b, ch_c, ch_x, ch_y

    ch_m = 'MAT_mat_A-00000'
    ch_b = 'MAT_rhs_b-00000'
    ch_c = 'MAT_rhs_c-00000'
    ch_x = 'MAT_sol_x-00000'
    ch_y = 'MAT_sol_y-00000'

    num_size = 5
    text_size = 10

    if(inum == 0 ) then
       is = 0
    else
       is = int(log(1.*inum)/log(10.)) ! SOMETIMES makes troubles due to rounding
       print*,log(1.*inum +1E-3)/log(10.), is
    endif
    write( ch5, '(i5)' ) inum  ! change the format if num_size /= 5 !!!
    ch_m(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)
    ch_b(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)
    ch_c(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)
    ch_x(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)
    ch_y(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)

    im = 10
    ib = 11
    ic = 12
    ix = 13
    iy = 14


    open(im, file = ch_m, status='UNKNOWN')
    open(ib, file = ch_b, status='UNKNOWN')
    open(ic, file = ch_c, status='UNKNOWN')
    open(ix, file = ch_x, status='UNKNOWN')
    open(iy, file = ch_y, status='UNKNOWN')

    call WriteBIGmatrixMatlab(im, nsize)
    call WriteBIGVectorMatlab(nsize, b, c, ib, ic)
    call WriteBIGVectorMatlab(nsize, x, y, ix, iy)

    close(im)
    close(ib)
    close(ic)
    close(ix)
    close(iy)

  end subroutine WriteBiCG_Matlab_All

  !> BIG block matrix - output to MATLAB
  subroutine WriteBIGmatrixMatlab(ifile, nsize)
    use matrix_oper_int
    integer, intent (in):: ifile
    integer, intent (in):: nsize
    class(element), pointer :: elem , elem1 ! one element
    integer :: i, j, k, l, l1, m, n, mm, nn, m1, n1, iee, irow
    integer :: dof, dof1, ndof, ndof1, Tdof, Tdof1, iss, is1
    real, dimension(:),allocatable :: accum
    integer :: p_mod, q_mod,bigNSize, max_dof, max_Tdof, dofAll

   ! write(ifile,*) '# size = ',nsize, nsize

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize( p_mod , q_mod )) &
         stop 'wrong size of vectors in bMVprodBIG!'


    bigNSize = state%bigNSize(p_mod, q_mod)

    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

       ! allocation of a matrix row
       allocate( accum( 1:nsize) )

       do i=1,grid%nelem
          elem => grid%elem(i)

          dof = DOFtriang( elem%deg + p_mod )
          Tdof = elem%Tdof + q_mod
          ndof = dof * ndim
          dofAll = ndof * Tdof

          iss = elem%bigNcv(p_mod, q_mod)

          do m = 1,Tdof

             do n = 1, ndim

                do l = 1, dof ! ???
                   irow = iss + (m -1)*ndof + (n - 1)*dof + (l-1)


                   accum(:) = 0.

                   ! time derivative terms (Mass matrix)
                   if (eta /= 0.) then
                      if ( q_mod > 0 ) stop "WriteBIGmatrixMatlab not implemented for q_mod > 0"

                      do m1 = 1,Tdof

                         do n1 = 1,ndim
                            iee = iss + (m1 -1)*ndof + (n1 - 1)*dof -1

                            accum(iee+1 : iee+dof)  = accum(iee+1 : iee+dof) + &
                                 eta * time%refTimeMatrix%Mb(m,m1)*elem%Mass%Mb(l, 1:dof)

                         enddo
                      enddo
                   endif


                   ! diagonal block
                   do m1 = 1,Tdof

                      do n1 = 1,ndim
                         iee = iss + (m1 -1)*ndof + (n1 - 1)*dof -1

                         accum(iee+1 : iee+dof)  = accum(iee+1 : iee+dof) + &
                              elem%bigBlock(0)%Mb(l, 1:dof, n, n1, m, m1)
                      enddo
                   enddo


                   !! off-diagonal blocks
                   do j=1,elem%flen
                      k = grid%elem(i)%face(neigh,j)

                      if(k > 0) then
                         elem1 => grid%elem(k)
                         dof1 = DOFtriang( elem1%deg + p_mod )
                         Tdof1 = elem1%Tdof + q_mod
                         ndof1 = dof1 * ndim

                         is1 = elem1%bigNcv(p_mod, q_mod)

                         do m1 = 1,Tdof1

                            do n1 = 1,ndim
                               iee = is1 + (m1 -1)*ndof + (n1 - 1)*dof1 -1

                               accum(iee+1 : iee+dof1)  = accum(iee+1 : iee+dof1) + &
                                    elem%bigBlock(j)%Mb(l, 1:dof1, n, n1, m, m1)
                            enddo
                         enddo
                      endif
                   enddo


                   ! writing
                   !write(*,'(i5,a2,100es11.3)')is+j-1,': ',accum(:)
                   do l1=1, nsize
                      if ( accum(l1)/= 0.  ) then
                         write(ifile,*) irow, l1, accum(l1)
                      endif
                   enddo !l

                end do ! do l=1,dof
             end do ! do n=1,ndim

          end do ! do m=1, Tdof


       enddo !i =1,grid%nelem

       deallocate(accum)
       class default
       stop 'WriteBIGmatrixMatlab only for STDG method'

    end select
  end associate

  !stop "id3kd3okdo3"

end subroutine WriteBIGmatrixMatlab


  !> BIG block matrix - output to MATLAB
  subroutine WriteBIGmatrixMatlab_Naive(nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    integer :: i, j
    real, dimension(:),allocatable :: x, b, c
    character(len=7) :: MatrixA, MatrixB

    MatrixA = 'MatrixB'
    open(21, file=MatrixA, status='UNKNOWN')
    write(21,*) '# size = ',nsize, nsize

    MatrixB = 'MatILUB'
    open(22, file=MatrixB, status='UNKNOWN')
    write(22,*) '# size = ',nsize, nsize

    ! allocation of a matrix row
    allocate( x( 1:nsize), b( 1:nsize), c(1:nsize) )

    do i = 1, nsize
       do j=1,nsize
          x(:) = 0.
          x(j) = 1.

          call bMVprodBIG(b, x, nsize)

          call bMViLUprodBIG(c, x, nsize)

          !write(*,'(i5,a2,100es11.3)')is+j-1,': ',accum(:)
          !do l1=1, nsize
          if ( b(i) /= 0.  ) write(21,*) i, j, b(i)
          if ( c(i) /= 0.  ) write(22,*) i, j, c(i)

          !enddo !l
       enddo
    enddo
    close(21)
    close(22)
    print*,'File "',MatrixA,'" written'
  end subroutine WriteBIGmatrixMatlab_Naive

  !> BIG block matrix - output to MATLAB
  subroutine WriteBIGmatrixMatlab_Simple(nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    integer :: i, j
    real, dimension(:),allocatable :: x, b, c
    character(len=7) :: MatrixA, MatrixB

    MatrixA = 'MatrixA'
    open(21, file=MatrixA, status='UNKNOWN')
    write(21,*) '# size = ',nsize, nsize

    MatrixB = 'Mat_ILU'
    open(22, file=MatrixB, status='UNKNOWN')
    write(22,*) '# size = ',nsize, nsize

    ! allocation of a matrix row
    allocate( x( 1:nsize), b( 1:nsize), C( 1:nsize) )

    do i = 1, nsize
       x(:) = 0.
       x(i) = 1.

       call bMVprodBIG_Dual(b, x, nsize)

       !call bMViLUprodBIG_Dual(c, x, nsize)

       !write(*,'(i5,a2,100es11.3)')is+j-1,': ',accum(:)
       !do l1=1, nsize
       do j=1,nsize
          if ( b(j) /= 0.  ) write(21, * ) i, j, b(j)
       !   if ( c(j) /= 0.  ) write(22, * ) i, j, c(j)
       enddo
    enddo
    deallocate(x, b, c)
    close(21)
    close(22)

  end subroutine WriteBIGmatrixMatlab_Simple

  subroutine WriteBIGVectorMatlab(nsize, x, y,  ifile1, ifile2)
    use matrix_oper_int
    integer, intent (in):: nsize, ifile1, ifile2
    real, dimension(1:nsize), intent (in):: x, y
    integer :: i, j


    do i=1, nsize
       write(ifile1, *) i, x(i)
       write(ifile2, *) i, y(i)
    enddo
  end subroutine WriteBIGVectorMatlab


  subroutine WriteMatrixLU()
    class(element), pointer:: elem,elem1 ! one element
    real, dimension(:),allocatable:: accum
    integer :: i,k,in,j,is, is1, dof, ndof, ndof1,Tdof,Tdof1

    allocate(accum(1:state%nsize))

    print*,'--------------  matrix ILU  -----------------'
    do i=1,grid%nelem
       elem => grid%elem(i)
       is = elem%ncv
       dof = elem%dof

       if (state%time%disc_time == 'STDG') then
       	Tdof = elem%Tdof
       else
       	Tdof = 1
       endif

       ndof = dof*ndim * Tdof

       do j=1,ndof
          accum(:) = 0.

          accum(is:is+ndof-1) =  accum(is:is+ndof-1) + elem%ILU(0)%Mb(j,1:ndof)


          do k=1,elem%flen
             in = elem%face(neigh,k)
             if(in >0) then
                elem1 => grid%elem(in)

                if (state%time%disc_time == 'STDG') then
                	Tdof1 = elem1%Tdof
                else
                	Tdof1 = 1
                endif

                ndof1 = elem1%dof * ndim * Tdof1
                is1 = elem1%ncv
                accum(is1:is1+ndof1-1) = elem%ILU(k)%Mb(j,1:ndof1)
             endif
          enddo

          write(*,'(i5,a2,100es12.4)')is+j-1,': ',accum(:)
       enddo
    enddo
    print*,'-----------end of   matrix ILU  -----------------'
    deallocate(accum)

  end subroutine WriteMatrixLU



  !> evaluation of the inverse (\f$ M^{-1} \f$ ) to Mblock Matrix \f$ M \f$,
  !> not further used
  subroutine  MblockLU(M, M1)
    type(Mblock), intent(in) :: M
    type(Mblock), intent(inout) :: M1
    real, dimension(:, :), allocatable :: L, U
    real, dimension(:), allocatable :: f, b, a
    integer :: dof, j, j1, k, k1

    if(size(M%Mb,1) /= size(M%Mb,2) .or. any(shape(M%Mb) /= shape(M1%Mb))) then
       print*,'Bad dimension in MblockLU in matrix.f90'
       print*,':',shape(M%Mb),shape(M%Mb)
       stop
    endif
    dof = size(M%Mb,1)

    ! TODO optimize

    allocate(L(1:dof, 1:dof), U(1:dof, 1:dof))
    allocate(a(1:dof), f(1:dof), b(1:dof))

    ! unit matrix
    L(1:dof, 1:dof) = 0.
    U(1:dof, 1:dof) = 0.
    do j=1, dof
       L(j,j) = 1.
    enddo

    ! let us compute the inverse elem%Mass by LU decomposition
    do k=1,dof
       do j=k,dof
          U(k,j) = M%Mb(k,j) - sum(L(k,1:k-1)*U(1:k-1,j) )
       enddo
       if(k /= dof) then
          do k1=k+1,dof
             L(k1,k) = (M%Mb(k1,k) - sum(L(k1,1:k-1)*U(1:k-1,k) )) /U(k,k)
          enddo
       endif
    enddo


    ! backward solution: LU a = e_k,  e_k is a canonical basis
    do k=1,dof
       f(1:dof) = 0.
       f(k) = 1.

       b(1) = f(1)/L(1,1)
       do j=2, dof
          b(j) = (f(j) - sum(b(1:j-1)*L(j,1:j-1) ) ) / L(j,j)
       enddo

       a(dof) = b(dof)/U(dof,dof)

       do j1 = 1, dof-1
          j = dof -j1
          a(j) = (b(j) - sum(a(j+1:dof) * U(j,j+1:dof) ) )/U(j,j)
       enddo

       M1%Mb(1:dof, k) = a(1:dof)
    enddo

    deallocate(L, U, f, b, a)

  end subroutine MblockLU

  !> block matrix - vector product: \f$ b = M^{-1}x \f$,  \f$ M^{-1}  \f$ is the
  !> inverse mass matrix in block form
  subroutine bMassInvVprod(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem
    integer :: i, k,dof, ielem, is, ie

    do i=1,grid%nelem
       elem => grid%elem(i)
       dof = elem%dof

       ielem = elem%ncv
       do k=1,ndim
          is = ielem + (k-1)*dof
          ie = is + dof -1
          b(is:ie) =  matmul(elem%MassInv%Mb(1:dof,1:dof), x(is:ie) )
       enddo
    enddo

  end subroutine bMassInvVprod

  !> block matrix - vector product: \f$ b = (\eta M+ C_k) x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).block(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  subroutine bMVprod(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem,elem1 ! one element
    integer :: i,j,k, ndof, ndof1, is, is1 !, l_accum
    real, dimension(:),allocatable :: accum
    !real, dimension(:),allocatable, save:: accum

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(maxval(grid%elem%dof) * ndim ) )
    !l_accum= maxval(grid%elem%dof) * ndim
    !if(size(accum) <= l_accum) then
    !   deallocate(accum)
    !   allocate(accum(1:l_accum))
    !endif

    !print*,'####!!!! eta=',eta
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof1 = elem%dof
       ndof = elem%dof * ndim
       is = elem%ncv

       if (eta /= 0.) then
         do k = 0,ndof-1,ndof1
           accum(k+1:k+ndof1) = eta * matmul(elem%Mass%Mb(1:ndof1, 1:ndof1), &
                x(is+k: is+k+ndof1-1))
         enddo
         ! diagonal block
         accum(1:ndof) = accum(1:ndof) &
              + matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
       else
         ! diagonal block
         accum(1:ndof) = matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
       endif


       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             ndof1 = elem1%dof * ndim
             is1 = elem1%ncv

             accum(1:ndof) = accum(1:ndof) &
                  + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1), x(is1: is1+ndof1-1) )
          endif
       enddo
       b(is: is+ndof-1) = accum(1:ndof)

       ! if(i==1) then
       !    do k=1,ndof1
       !       write(*,'(a6,i5,300es12.4)') ' Mass:',k, elem%Mass%Mb(k, 1:ndof1)
       !    enddo
       ! write(*,'(a6,300es12.4)') ' x:',x(1:8)
       ! write(*,'(a6,300es12.4)') 'b=Ax:',b(1:8)

       ! endif


    enddo

    deallocate(accum)

  end subroutine bMVprod


  !> block matrix - vector product: \f$ b = (\eta M+ C_k) x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass multiplied by state%time%refTimeMatrix,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).blockST(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  subroutine bMVprodST(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem,elem1 ! one element
    integer :: i, j, k, l, m, n, mm, nn
    integer :: dof, dof1, ndof, ndof1, Tdof, is, is1
    real, dimension(:),allocatable :: accum

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )


       ! allocate accum once to accomodate for the largest dof.

       !allocate(accum(maxval(grid%elem%dof) * ndim ) )
       allocate( accum( state%space%max_dof * state%time%max_Tdof * ndim) )


       !print*,'####bMVprodST!!!! eta=',eta
       do i=1,grid%nelem
       !   print*, "b in the beginning = ", norm2(b)
       ! 	print*, 'element', i
          accum(:) = 0.0
          elem => grid%elem(i)
          ndof1 = elem%dof
          Tdof = elem%Tdof
          ndof = elem%dof * ndim
          dof = ndof * Tdof

          is = elem%ncv

          if (eta /= 0.) then
             do m =1,Tdof
                mm = (m-1) * ndof
                do n = 1,Tdof
                   nn = (n-1) * ndof
                   do k = 0,ndof-1,ndof1
                      !write(*,'(2(a7,i5),a6,i5,a1,i5,3(a6,i5))') &
                      !     'elem',i,'ncv:', is, 'accum',mm + k + 1,':', mm+ k + ndof1, 'k =', k , 'm=', m, 'n=', n
                      !	print*,   (is+nn+k )
                      accum(mm + k + 1 : mm+ k + ndof1) = accum(mm + k + 1 : mm+ k + ndof1) + &
                           eta * time%refTimeMatrix%Mb(m,n) * &
                           matmul(elem%Mass%Mb(1:ndof1, 1:ndof1) , x(is+nn+k : is+nn+k+ndof1-1))

                   enddo !k

                enddo !n
             enddo !m

             !print*, "after mass = ", norm2(b)

             ! diagonal block
             !print*, "norm x = " , x(is: is+dof-1)
             !print*, "accum mass = ", norm2(accum(1:dof))
             accum(1:dof) = accum(1:dof) &
                  + matmul(elem%blockST(0)%Mb(1:dof, 1:dof), x(is: is+dof-1) )
             !print*, "accum 1 = ", dof,  norm2(accum)


             !stop

          else
             ! diagonal block
             accum(1:dof) = matmul(elem%blockST(0)%Mb(1:dof, 1:dof), x(is: is+dof-1) )
          endif

          !! off-diagonal blocks
          do j=1,elem%flen
             k = grid%elem(i)%face(neigh,j)

             if(k > 0) then
                elem1 => grid%elem(k)
                dof1 = elem1%dof * ndim * elem1%Tdof
                is1 = elem1%ncv

                accum(1:dof) = accum(1:dof) &
                     + matmul(elem%blockST(j)%Mb(1:dof, 1:dof1), x(is1: is1+dof1-1) )

               !print*, "accum 1 = ",i,j, norm2(accum)
             endif
          enddo !j

          b(is: is+dof-1) = accum(1:dof)

          !if(dot_product(accum(1:dof), accum(1:dof)) > 1E-15 .and. state%time%recompute_back >= 2) &
          !     write(*,'(a8,2i5, 300es12.4)') 'Ax:',i, dof, accum(1:dof)
          !print*, "bMV_ST : elem ", i, "norm b = ", norm2(accum(1:dof))
       enddo !i


       deallocate(accum)
    class default
      stop 'bMVprodST only for STDG method'

    end select
    end associate

    !print*,'#############E#E##E#E#', state%linSolver%iter
    !if(state%time%recompute_back == 2 .and. state%linSolver%iter >= 1 )stop "3d3ed388"

  end subroutine bMVprodST


  !> transposed block matrix - vector product: \f$ b = (\eta M+ C_k)^T x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass multiplied by state%time%refTimeMatrix,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).blockST(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  subroutine bMVprodST_Dual(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
!    real, intent(in) :: eta ! 1 / \tau,  non/stationary problem
    class(element), pointer:: elem,elem1 ! one element
    integer :: i, j, k, l, m, n, mm, nn
    integer :: dof, dof1, ndof, ndof1, Tdof, is, is1, big_dof
    integer :: loc_neigh
    real, dimension(:),allocatable :: accum

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

!       print*, 'ETA (bMVprodST_Dual)= ', eta

!      print*, 'nsize sa: ' , nsize, size(x(:))


       ! allocate accum once to accomodate for the largest dof.

       !allocate(accum(maxval(grid%elem%dof) * ndim ) )
       allocate( accum( state%space%max_dof * state%time%max_Tdof * ndim) )

       big_dof = 0
!       print*,'####!!!! eta=',eta
       !print*, 'blockST0:' , grid%elem(1)%blockST(0)%Mb
        ! stationary
       if (eta == 0.0) then
          do i=1,grid%nelem
            accum(:) = 0.0
            elem => grid%elem(i)
            ndof1 = elem%dof
            Tdof = elem%Tdof
            ndof = elem%dof * ndim
            dof = ndof * Tdof
            big_dof = big_dof + dof



            is = elem%ncv
            ! diagonal block - TRANSPOSED
            accum(1:dof) = matmul(  x(is: is+dof-1), elem%blockST(0)%Mb(1:dof, 1:dof) )

           !! off-diagonal blocks - we have to find them in elem(?)%blockST(??)
            do j=1,elem%flen
               k = elem%face(neigh,j)

               if(k > 0) then
                  elem1 => grid%elem(k) ! for Dual version purposes
                  dof1 = elem1%dof * ndim * elem1%Tdof ! same for dual and primal
                  is1 = elem1%ncv ! same for dual and primal
                  loc_neigh = elem%face(nei_i, j) ! local index of elem as an neighbor of elem1

                  accum(1:dof) = accum(1:dof) &
                       ! + matmul(elem%blockST(j)%Mb(1:dof, 1:dof1), x(is1: is1+dof1-1) ) - primal version
                      + matmul( x(is1: is1+dof1-1) , elem1%blockST(loc_neigh)%Mb(1:dof1, 1:dof) )! dual version
               endif
            enddo !j

            b(is: is+dof-1) = accum(1:dof)
         enddo !i

         if (big_dof /= nsize) then
            print*, 'wrong dimension in bMVprodST_dual' , big_dof, nsize
            stop
         endif
       else
         do i=1,grid%nelem
            accum(:) = 0.0
            elem => grid%elem(i)
            ndof1 = elem%dof
            Tdof = elem%Tdof
            ndof = elem%dof * ndim
            dof = ndof * Tdof

            is = elem%ncv
  !             do m =1,Tdof
  !                mm = (m-1) * ndof
  !                do n = 1,Tdof
  !                   nn = (n-1) * ndof
  !                   do k = 0,ndof-1,ndof1
  !                      !write(*,'(2(a7,i5),a6,i5,a1,i5,3(a6,i5))') &
  !                      !     'elem',i,'ncv:', is, 'accum',mm + k + 1,':', mm+ k + ndof1, 'k =', k , 'm=', m, 'n=', n
  !                      !	print*,   (is+nn+k )
  !                      accum(mm + k + 1 : mm+ k + ndof1) = accum(mm + k + 1 : mm+ k + ndof1) + &
  !                           eta * time%refTimeMatrix%Mb(m,n) * &
  !                           matmul(elem%Mass%Mb(1:ndof1, 1:ndof1) , x(is+nn+k : is+nn+k+ndof1-1))
  !
  !                   enddo !k
  !
  !                enddo !n
  !             enddo !m
  !
  !             ! diagonal block
  !             accum(1:dof) = accum(1:dof) &
  !                  + matmul(elem%blockST(0)%Mb(1:dof, 1:dof), x(is: is+dof-1) )
  !
           stop 'eta /= 0 not implemented in bMVprodST_Dual'

           !! off-diagonal blocks
           !F@R control offdiag blocks - structure
            do j=1,elem%flen
               k = grid%elem(i)%face(neigh,j)

               if(k > 0) then
                  elem1 => grid%elem(k)
                  dof1 = elem1%dof * ndim * elem1%Tdof ! same for dual and primal
                  is1 = elem1%ncv ! same for dual and primal
                  loc_neigh = elem%face(nei_i, j) ! local index of elem as an neighbor of elem1
                  accum(1:dof) = accum(1:dof) &
                       ! + matmul(elem%blockST(j)%Mb(1:dof, 1:dof1), x(is1: is1+dof1-1) ) - primal version
                      + matmul( x(is1: is1+dof1-1) , elem1%blockST(loc_neigh)%Mb(1:dof1, 1:dof) )! dual version
               endif
            enddo !j

            b(is: is+dof-1) = accum(1:dof)
         enddo !i

       endif ! eta

       deallocate(accum)
    class default
      stop 'bMVprodST_Dual only for STDG method'

    end select
    end associate

  end subroutine bMVprodST_Dual


  !> block matrix - vector product: \f$ b = (\eta M+ C_k) x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass multiplied by state%time%refTimeMatrix,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).blockST(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  !> uses the 6-dimensional matrix blocks elem%bigBlock(:)
  subroutine bMVprodBIG(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer :: elem , elem1 ! one element
    integer :: i, j, k,   m, n, mm, nn
    integer :: dof, dof1, ndof, ndof1, Tdof, iss, is1, Tdof1, dofAll1
    real, dimension(:),allocatable :: accum, accum1
    integer :: p_mod, q_mod,bigNSize, max_dof, max_Tdof, dofAll
    integer, dimension(1:6) :: dimensions
    type(Elemental3_t) :: y, z

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize( p_mod , q_mod )) &
      stop 'wrong size of vectors in bMVprodBIG!'

    !print*,'eta= ',eta, p_mod,q_mod

    bigNSize = state%bigNSize(p_mod, q_mod)

    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

       ! allocate accum once to accomodate for the largest dof.
       allocate( accum( max_dof*max_Tdof*ndim) )
       allocate( accum1( max_dof*max_Tdof*ndim) )

       do i=1,grid%nelem

        !print*, "b in the beginning = ", norm2(b)
        accum(:) = 0.0
        elem => grid%elem(i)

        dof = DOFtriang( elem%deg + p_mod )
        Tdof = elem%Tdof + q_mod
        ndof = dof * ndim
        dofAll = ndof * Tdof

        iss = elem%bigNcv(p_mod, q_mod)

        if (eta /= 0.) then
           if ( q_mod > 0 ) &
                stop "bMVprodBIG not implemented for q_mod > 0"
           do m =1,Tdof
              mm = (m-1) * ndof
              do n = 1,Tdof
                 nn = (n-1) * ndof
                 do k = 0,ndof-1,dof ! ???
                    accum(mm + k + 1 : mm+ k + dof) = accum(mm + k + 1 : mm+ k + dof) + &
                         eta * time%refTimeMatrix%Mb(m,n) * &
                         matmul(elem%Mass%Mb(1:dof, 1:dof) , x(iss+nn+k : iss+nn+k+dof-1))
                 enddo !k

              enddo !n
           enddo !m

        endif

! TODO add if clause for eta
!          else


        ! diagonal block
        ! x -> y(1:dof, 1:ndim, 1:tdof)
        !          print*, 'iss atd = ', iss, dofAll, dof, Tdof
        !print*, "norm x = " , x(iss: iss+dofAll-1)
        !          print*, 'size of X = ' , size(x)
        call y%initFrom1DArray_mat_order( x(iss: iss+dofAll-1), dof, ndim, Tdof )
        !          print*, 'xxx = ' , x !x(iss: iss+dofAll-1)

        dimensions = (/ dof, dof, ndim, ndim, Tdof, Tdof  /)

        !          call z%init( dof, ndim, Tdof)
        !          print*, 'diagonal block init', dof, ndim, Tdof
        !          print*, 'size z%x = ', size(z%x)

        call vectorMulBigBlock( elem%bigBlock(0) , y, dimensions, z )
        ! for eta /= 0 there is already the part from the mass matrix
        !print*, "accum mass = ", norm2(accum)
        accum(1:dofAll) = accum(1:dofAll)  + z%copyTo1Darray()
        !print*, "accum 1 = ", norm2(accum)

        
!          if (i==1) then
!            !print*, 'yyy = ', y%x
!            !print*, 'norm of bigBlock multiplication = ', norm2( accum(1:dofAll) )
!
!            accum(1:dofAll) = matmul(elem%blockST(0)%Mb(1:dofAll, 1:dofAll), x(iss: iss+dofAll-1) )
!
!            print*, 'norm of blockST multiplication = ', norm2( accum(1:dofAll) )
!
!          end if

        call y%delete()
        call z%delete()

!          endif

        !print*, 'size x', size(x)

        !! off-diagonal blocks
        do j=1,elem%flen
           k = grid%elem(i)%face(neigh,j)

           if(k > 0) then
              elem1 => grid%elem(k)
              dof1 = DOFtriang( elem1%deg + p_mod )
              Tdof1 = elem1%Tdof + q_mod

              is1 = elem1%bigNcv(p_mod, q_mod)
              dofAll1 = dof1 * Tdof1 * ndim

              call y%initFrom1DArray_mat_order( x(is1: is1+dofAll1-1), dof1, ndim, Tdof1 )
              dimensions = (/ dof, dof1, ndim, ndim, Tdof, Tdof1 /)
              !              call z%init( dof, ndim, Tdof )
              !              print*, 'neigbour elements'
              call vectorMulBigBlock( elem%bigBlock(j) , y, dimensions, z )
              !              if (i==2) then
              !                print*, "1d zzz = ", z%copyTo1Darray()
              !                print*, "old ver = ", matmul(elem%blockST(j)%Mb(1:dofAll, 1:dofAll1), x(is1: is1+dofAll1-1) )
              !              endif

              accum(1:dofAll) = accum(1:dofAll) + z%copyTo1Darray()
              !print*, "accum ", i,j, norm2(accum)

              
              !print*, 'norm of bigBlock multiplication = ', j,  norm2( accum(1:dofAll) )
              !accum(1:dofAll) = matmul(elem%blockST(j)%Mb(1:dofAll, 1:dofAll1), x(is1: is1+dofAll1-1) )
              !print*, 'norm of blockST multiplication = ', j , norm2( accum(1:dofAll) )

              call y%delete()
              call z%delete()

             endif
          enddo !j

          b(iss: iss+dofAll-1) = accum(1:dofAll)
          !!!write(*,'(a8, 2i6, 300es12.4)') 'matrix:',iss, iss+dofAll-1, accum(1:dofAll)
          !print*, "elem ", i, "norm b = ", norm2(accum(1:dofAll))
       enddo !i

       deallocate(accum)
       deallocate(accum1)
    class default
      stop 'bMVprodST only for STDG method'

    end select
    end associate

  end subroutine bMVprodBIG


  !> block matrix - vector product: \f$ b = (\eta M+ C_k) x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass multiplied by state%time%refTimeMatrix,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).blockST(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  !> uses the 6-dimensional matrix blocks elem%bigBlock(:)
  !> it depends on the values state%p_mod, state%q_mod
  subroutine bMVprodBIG_Dual(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer :: elem , elem1 ! one element
    integer :: i, j, k, l, m, n, mm, nn
    integer :: dof, dof1, ndof, ndof1, Tdof, iss, is1, Tdof1, dofAll1
    real, dimension(:),allocatable :: accum
    integer :: p_mod, q_mod,bigNSize, max_dof, max_Tdof, dofAll, loc_neigh
    integer, dimension(1:6) :: dimensions
    type(Elemental3_t) :: y, z

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize( p_mod , q_mod )) then
      print*,  "pmod,qmod = ", p_mod , q_mod
      print*, "nSize = ", nsize
      stop 'wrong size of vectors in bMVprodBIG_Dual!'
    endif


    bigNSize = state%bigNSize(p_mod, q_mod)

    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod

    b(1:nsize) = 0.0

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

      ! allocate accum once to accomodate for the largest dof.
      allocate( accum( max_dof*max_Tdof*ndim) )

      do i=1,grid%nelem
        accum(:) = 0.0
        elem => grid%elem(i)

        dof = DOFtriang( elem%deg + p_mod )
        Tdof = elem%Tdof + q_mod
        ndof = dof * ndim
        dofAll = ndof * Tdof

        iss = elem%bigNcv(p_mod, q_mod)

        if (eta /= 0.) then
           do m =1,Tdof
              mm = (m-1) * ndof
              do n = 1,Tdof
                 nn = (n-1) * ndof
                 do k = 0,ndof-1,dof ! ???
                    accum(mm + k + 1 : mm+ k + dof) = accum(mm + k + 1 : mm+ k + dof) + &
                         eta * time%refTimeMatrix%Mb(n,m) * &
                         matmul(elem%Mass%Mb(1:dof, 1:dof) , x(iss+nn+k : iss+nn+k+dof-1))
                 enddo !k
              enddo !n
           enddo !m

        endif


        ! diagonal block
        call y%initFrom1DArray_mat_order( x(iss: iss+dofAll-1), dof, ndim, Tdof )
        dimensions = (/ dof, dof, ndim, ndim, Tdof, Tdof  /)
        call vectorMulBigBlockTransposed( elem%bigBlock(0) , y, dimensions, z )

        accum(1:dofAll) = accum(1:dofAll) + z%copyTo1Darray()

           !print*, 'norm of bigBlock multiplication = ', norm2( accum(1:dofAll) )
           !accum(1:dofAll) = matmul(elem%blockST(0)%Mb(1:dofAll, 1:dofAll), x(iss: iss+dofAll-1) )
           !print*, 'norm of blockST multiplication = ', norm2( accum(1:dofAll) )

        call y%delete()
        call z%delete()


        !! off-diagonal blocks
          do j=1,elem%flen
            k = elem%face(neigh,j)

            if(k > 0) then
              elem1 => grid%elem(k)
              dof1 = DOFtriang( elem1%deg + p_mod )
              Tdof1 = elem1%Tdof + q_mod

              is1 = elem1%bigNcv(p_mod, q_mod)
              dofAll1 = dof1 * Tdof1 * ndim

              ! local index of elem as an neighbor of elem1
              loc_neigh = elem%face(nei_i, j)

              call y%initFrom1DArray_mat_order( x(is1: is1+dofAll1-1), dof1, ndim, Tdof1 )
!              dimensions = (/ dof, dof1, ndim, ndim, Tdof, Tdof1 /)
              dimensions = (/ dof1, dof, ndim, ndim, Tdof1, Tdof /)
              !print*, "dimensions:" , elem%i,  elem%deg, elem1%deg, dof, dof1

              call vectorMulBigBlockTransposed( elem1%bigBlock(loc_neigh), y, dimensions, z )

              accum(1:dofAll) = accum(1:dofAll) + z%copyTo1Darray()

              !print*, 'norm of bigBlock multiplication = ', j,  norm2( accum(1:dofAll) )
              !accum(1:dofAll) = matmul(elem%blockST(j)%Mb(1:dofAll, 1:dofAll1), x(is1: is1+dofAll1-1) )
              !print*, 'norm of blockST multiplication = ', j , norm2( accum(1:dofAll) )
              call y%delete()
              call z%delete()

             endif
          enddo !j

          b(iss: iss+dofAll-1) = accum(1:dofAll)
      enddo !i

      deallocate(accum)
    class default
      stop 'bMVprodBIG_Dual only for STDG method'

    end select
    end associate

  end subroutine bMVprodBIG_Dual


  !> block matrix - vector product: \f$ b = (\eta M+ C_k) x \f$ using MASK
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass multiplied by state%time%refTimeMatrix,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).blockST(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  !> uses the 6-dimensional matrix blocks elem%bigBlock(:)
  subroutine bMVprodBIGMask(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer :: elem , elem1 ! one element
    integer :: i, j, k,   m, n, mm, nn
    integer :: dof, dof1, ndof, ndof1, Tdof, iss, is1, Tdof1, dofAll1
    real, dimension(:),allocatable :: accum, accum1
    integer :: p_mod, q_mod,bigNSize, max_dof, max_Tdof, dofAll
    integer, dimension(1:6) :: dimensions
    type(Elemental3_t) :: y, z

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize( p_mod , q_mod )) &
      stop 'wrong size of vectors in bMVprodBIG!'

    !print*,'eta= ',eta

    bigNSize = state%bigNSize(p_mod, q_mod)

    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

       ! allocate accum once to accomodate for the largest dof.
       allocate( accum( max_dof*max_Tdof*ndim) )
       allocate( accum1( max_dof*max_Tdof*ndim) )

       do i=1,grid%nelem

        !print*, "b in the beginning = ", norm2(b)
        accum(:) = 0.0
        elem => grid%elem(i)

        dof = DOFtriang( elem%deg + p_mod )
        Tdof = elem%Tdof + q_mod
        ndof = dof * ndim
        dofAll = ndof * Tdof

        iss = elem%bigNcv(p_mod, q_mod)

        if (eta /= 0.) then
           if ( q_mod > 0 ) &
                stop "bMVprodBIG not implemented for q_mod > 0"
           do m =1,Tdof
              mm = (m-1) * ndof
              do n = 1,Tdof
                 nn = (n-1) * ndof
                 do k = 0,ndof-1,dof ! ???
                    accum(mm + k + 1 : mm+ k + dof) = accum(mm + k + 1 : mm+ k + dof) + &
                         eta * time%refTimeMatrix%Mb(m,n) * &
                         matmul(elem%Mass%Mb(1:dof, 1:dof) , x(iss+nn+k : iss+nn+k+dof-1))
                 enddo !k

              enddo !n
           enddo !m

        endif

! TODO add if clause for eta
!          else


        ! diagonal block
        ! x -> y(1:dof, 1:ndim, 1:tdof)
        !          print*, 'iss atd = ', iss, dofAll, dof, Tdof
        !print*, "norm x = " , x(iss: iss+dofAll-1)
        !          print*, 'size of X = ' , size(x)
!MASK        call y%initFrom1DArray_mat_order( x(iss: iss+dofAll-1), dof, ndim, Tdof )
        !          print*, 'xxx = ' , x !x(iss: iss+dofAll-1)

        dimensions = (/ dof, dof, ndim, ndim, Tdof, Tdof  /)

        !          call z%init( dof, ndim, Tdof)
        !          print*, 'diagonal block init', dof, ndim, Tdof
        !          print*, 'size z%x = ', size(z%x)

!MASK        call vectorMulBigBlock( elem%bigBlock(0) , y, dimensions, z )
        ! for eta /= 0 there is already the part from the mass matrix
        !print*, "accum mass = ", norm2(accum)
!MASK        accum(1:dofAll) = accum(1:dofAll)  + z%copyTo1Darray()
        !print*, "accum 1 = ", norm2(accum)


        !if( norm2(z%copyTo1Darray()) > 0. .and. elem%deg >= 2) then
           !write(*,'(8i5)') dofAll, dof, dof, ndim, ndim, Tdof, Tdof 
           !write(*,'(a6,i5,400es12.4)') 'vMBBs:',dofAll, accum(1:dofAll) 
           !write(*,'(a6,i5,400es12.4)') 'vMBBs:',elem%i, z%copyTo1Darray()

        call vectorMulBigBlockMask(elem%bigBlock(0), dimensions, &
             x(iss: iss+dofAll-1), dofAll,  accum1(1:dofAll) , dofAll)

        accum(1:dofAll) = accum(1:dofAll)  +  accum1(1:dofAll)   !!z%copyTo1Darray()
                
        
           !write(*,'(a6,i5,400es12.4)') 'vMBB##:',dofAll, accum1(1:dofAll) 
           !write(*,'(a6,3i5,400es12.4)') 'vMBB D:',elem%i,0,dofAll, norm2(accum1(1:dofAll) -  z%copyTo1Darray())
           !print*
           !stop
        !endif
        
        

!MASK        call y%delete()
!MASK        call z%delete()

!          endif

        !print*, 'size x', size(x)

        !! off-diagonal blocks
        do j=1,elem%flen
           k = grid%elem(i)%face(neigh,j)

           if(k > 0) then
              elem1 => grid%elem(k)
              dof1 = DOFtriang( elem1%deg + p_mod )
              Tdof1 = elem1%Tdof + q_mod

              is1 = elem1%bigNcv(p_mod, q_mod)
              dofAll1 = dof1 * Tdof1 * ndim

!MASK              call y%initFrom1DArray_mat_order( x(is1: is1+dofAll1-1), dof1, ndim, Tdof1 )
              dimensions = (/ dof, dof1, ndim, ndim, Tdof, Tdof1 /)
              !              call z%init( dof, ndim, Tdof )
              !              print*, 'neigbour elements'
!MASK              call vectorMulBigBlock( elem%bigBlock(j) , y, dimensions, z )
              !              if (i==2) then
              !                print*, "1d zzz = ", z%copyTo1Darray()
              !                print*, "old ver = ", matmul(elem%blockST(j)%Mb(1:dofAll, 1:dofAll1), x(is1: is1+dofAll1-1) )
              !              endif

!MASK              accum(1:dofAll) = accum(1:dofAll) + z%copyTo1Darray()
              !print*, "accum ", i,j, norm2(accum)


              !if( norm2(z%copyTo1Darray()) > 0. .and. elem%deg >= 2) then
                 !write(*,'(8i5)') dofAll, dof, dof, ndim, ndim, Tdof, Tdof 
                 !write(*,'(a6,i5,400es12.4)') 'vMBBs:',dofAll, accum(1:dofAll) 
                 !write(*,'(a6,i5,400es12.4)') 'vMBBs:',elem%i, z%copyTo1Darray()

              call vectorMulBigBlockMask(elem%bigBlock(j), dimensions, &
                   x(is1: is1+dofAll1-1), dofAll1,  accum1(1:dofAll) , dofAll)
              
              accum(1:dofAll) = accum(1:dofAll) + accum1(1:dofAll) 


              !write(21,'(a6,i5,400es12.4)') 'vMBB##:',dofAll, accum1(1:dofAll) 
                 !write(*,'(a6,3i5,400es12.4)') 'vMBB O:',elem%i, j, dofAll1, norm2(accum1(1:dofAll) -  z%copyTo1Darray())
                 !print*
                 !stop
              !endif

              
              !print*, 'norm of bigBlock multiplication = ', j,  norm2( accum(1:dofAll) )
              !accum(1:dofAll) = matmul(elem%blockST(j)%Mb(1:dofAll, 1:dofAll1), x(is1: is1+dofAll1-1) )
              !print*, 'norm of blockST multiplication = ', j , norm2( accum(1:dofAll) )

!MASK              call y%delete()
!MASK              call z%delete()

             endif
          enddo !j

          b(iss: iss+dofAll-1) = accum(1:dofAll)
          !!!write(*,'(a8, 2i6, 300es12.4)') 'matrix:',iss, iss+dofAll-1, accum(1:dofAll)
          !print*, "elem ", i, "norm b = ", norm2(accum(1:dofAll))
       enddo !i

       deallocate(accum)
       deallocate(accum1)
    class default
      stop 'bMVprodST only for STDG method'

    end select
    end associate

  end subroutine bMVprodBIGMask




  
  !> localization of a long verctor to each element part
  !> uses the 6-dimensional matrix blocks elem%bigBlock(:)
  subroutine LocalizeBIG(nsize, x, est)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:grid%nelem), intent(inout) :: est
    class(element), pointer :: elem
    integer :: i, dof,  ndof, Tdof, iss, dofAll, p_mod, q_mod, bigNsize


    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize( p_mod , q_mod )) &
         stop 'wrong size of vectors in bMVprodBIG!'

    !bigNSize = state%bigNSize(p_mod, q_mod)

    associate( time => state%time )
      select type( time )
      type is ( TimeTDG_t )

         do i=1,grid%nelem
            elem => grid%elem(i)

            dof = DOFtriang( elem%deg + p_mod )
            Tdof = elem%Tdof + q_mod
            ndof = dof * ndim
            dofAll = ndof * Tdof

            iss = elem%bigNcv(p_mod, q_mod)

            est(i) = sum( x(iss: iss+dofAll-1) )
            !print*,'#E#E#:', iss, iss+dofAll-1, nsize, est(i)

         enddo
      end select
    end associate
  end subroutine LocalizeBIG


 !> regularization of the large vector assembled from the STDG solution
 !> p_0 term is updated b small factor
 subroutine RegularizeVectorST_from_wST(b)
   real, dimension(:), intent(inout) :: b
   class(element), pointer:: elem ! one element
   integer  :: kk, i, k, j
   real :: eps = 1E-12

   kk = 0
   do i = 1, grid%nelem
      elem => grid%elem(i)
      do k = 1, elem%Tdof
         do j = 1, ndim
            ! only P_0 term
            b( kk + 1 : kk + 1) = b( kk + 1 : kk + 1)  + eps
            kk = kk + elem%dof
         enddo !j
      enddo !k
   enddo !i
   if(kk /= size(b, 1)) stop "Trouble in  FillVectorST_from_wST"

 end subroutine RegularizeVectorST_from_wST




  SUBROUTINE MGbMVprod(b,x,nsize)
  ! Block matrix-vector product in pMG cycle.
    use matrix_oper_int

    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem,elem1 ! one element
!    integer :: i,j,k, ndof, ndof1, is, is1 !, l_accum
    !integer :: i,j,k, dof, ndof, is,is1, dof1,ndof1, is2
    integer:: i,j,k,ki,kj,dof,ndof,dof1,ndof1,MGdof,MGdof1,MGndof,MGndof1,MGis,MGis1
    real, dimension(:),allocatable :: accum
    !real,dimension(:,:),allocatable :: mtxdummy
    !real, dimension(:),allocatable, save:: accum

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(sum(grid%elem(:)%MGdof) * ndim ) )
    !l_accum= maxval(grid%elem%dof) * ndim
    !if(size(accum) <= l_accum) then
    !   deallocate(accum)
    !   allocate(accum(1:l_accum))
    !endif

    do  i=1,grid%nelem,1

        accum(:)=0.

        elem => grid%elem(i)

        MGdof = elem%MGdof  ! povodne ndof1
        MGndof = ndim*MGdof   != elem%MGdof * ndim
        MGis = elem%MGncv

        dof = elem%dof
        ndof = ndim*dof

        !! diagonal blocks
        if( eta /= 0.) then
            do  k = 0,ndof-1,dof
                accum(k+1:k+MGdof) = &
                    eta * matmul(elem%Mass%Mb(1:MGdof, 1:MGdof),x(MGis+k: Mgis+k+MGdof-1))
            end do
        end if

        !FIXME - nasobenie v rezime MG
        !accum(1:ndof) = accum(1:ndof) &
        !    + matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
        do  ki=0,ndim-1,1
            do  kj=0,ndim-1,1
                accum(ki*MGdof+1:(ki+1)*MGdof) = accum(ki*MGdof+1:(ki+1)*MGdof) &
                    + matmul( &
                    elem%block(0)%Mb(ki*dof+1:ki*dof+MGdof, kj*dof+1:kj*dof+MGdof), &
                    x(MGis+kj*MGdof: MGis+(kj+1)*MGdof-1) &
                    )
            end do
        end do


       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             MGdof1 = elem1%MGdof
             MGndof1 = ndim*dof1
             MGis1 = elem1%MGncv
             dof1 = elem1%dof

             !FIXME - nasobenie v rezime MG
             !accum(1:ndof) = accum(1:ndof) &
             !     + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1), x(is1: is1+ndof1-1) )
              do  ki=0,ndim-1,1
                  do  kj=0,ndim-1,1
                      accum(ki*MGdof+1:(ki+1)*MGdof) = accum(ki*MGdof+1:(ki+1)*MGdof) &
                          + matmul( &
                          elem%block(j)%Mb(ki*dof+1:ki*dof+MGdof, kj*dof1+1:kj*dof1+MGdof1), &
                          x(MGis1+kj*MGdof: MGis1+(kj+1)*MGdof1-1) &
                          )
                  end do
             end do
          endif
       enddo
       b(MGis: MGis+MGndof-1) = accum(1:MGndof)
    enddo

    deallocate(accum)

  END SUBROUTINE MGbMVprod

subroutine MGbMVprodDiag(b,x,nsize)
! Block matrix-vector product in pMG cycle.
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem
    integer :: i, ndof, is


    do i=1,grid%nelem
       elem => grid%elem(i)

       ndof = ndim * elem%MGdof  != elem%MGdof * ndim,  povodne ndof1
       is = elem%MGncv

       b(is: is+ndof-1) = matmul(elem%ILU(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
       !b(is: is+ndof-1) =  x(is: is+ndof-1)
    enddo


  end subroutine MGbMVprodDiag


  subroutine MGbMVprod2(b,x,nsize)
  ! Block matrix-vector product in pMG cycle.
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem,elem1 ! one element
!    integer :: i,j,k, ndof, ndof1, is, is1 !, l_accum
    integer :: i,j,k, dof, ndof, is,is1, dof1,ndof1, is2
   !integer:: i,j,k,dof,ndof,dof1,ndof1,MGdof,MGdof1,MGndof,MGndfo1,is1,is2
    real, dimension(:),allocatable :: accum
    !real, dimension(:),allocatable, save:: accum

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(maxval(grid%elem%dof) * ndim ) )
    !l_accum= maxval(grid%elem%dof) * ndim
    !if(size(accum) <= l_accum) then
    !   deallocate(accum)
    !   allocate(accum(1:l_accum))
    !endif

    accum(:)=0.

    do  i=1,grid%nelem,1
        elem => grid%elem(i)

        dof = elem%MGdof  ! povodne ndof1
        ndof = ndim*dof   != elem%MGdof * ndim
        is = elem%MGncv

        !! diagonal blocks
        if( eta /= 0.) then
            do  k = 0,ndof-1,dof
                accum(k+1:k+dof) = &
                    eta * matmul(elem%Mass%Mb(1:dof, 1:dof),x(is+k: is+k+dof-1))
            end do
        end if

        !FIXME - nasobeniev rezime MG
        !accum(1:ndof) = accum(1:ndof) &
        !    + matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
        do  k=0,ndof-1,dof
            accum(k+1:k+dof) = &
                matmul(elem%block(0)%Mb(1:dof, 1:dof), x(is+k: is+k+dof-1))
        end do


       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             dof1 = elem1%MGdof
             ndof1 = dof1 * ndim
             is1 = elem1%MGncv

             !FIXME - nasobenie v rezime MG
             !accum(1:ndof) = accum(1:ndof) &
             !     + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1), x(is1: is1+ndof1-1) )
              do  k=0,ndof-1,dof
                  accum(k+1:k+dof) = &
                      matmul(elem%block(j)%Mb(1:dof, 1:dof1), x(is1+k: is1+k+dof1-1))
             end do
          endif
       enddo
       b(is: is+ndof-1) = accum(1:ndof)
    enddo

    deallocate(accum)

  end subroutine MGbMVprod2


  !> double product by J. Hajek
  subroutine bMVprod2(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    real, dimension(1:nsize):: bm
    integer:: n

    n = nsize/2

    call bMVprod(b,x,n)
    call bMVprod(b(1+n),x(1+n),n)
    call bMVmassprod(bm,x,n)
    call bMVmassprod(bm(1+n),x(1+n),n)
    b(:n) = b(:n) + iota(1) * bm(n+1:)
    b(n+1:) = b(n+1:) + iota(2) * bm(:n)
  end subroutine bMVprod2

  !> diagonal block matrix - vector product: \f$ b = Mx \f$,  \f$ M  \f$ is the block
  !> mass matrix grid.elem(*).Mass
  subroutine bMVmassprod(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem ! one element
    integer :: i, ndof, is

    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof * ndim
       is = elem%ncv

       ! FIXME
       ! diagonal block
       b(is: is+ndof-1) = matmul(elem%Mass%Mb, x(is: is+ndof-1) )
    enddo
  end subroutine bMVmassprod



  !> block matrix - vector product: \f$ b = Ax \f$,
  !>   \f$ A  \f$ is the block matrix grid.elem(*).block(*)
  subroutine bMVprodA(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem,elem1 ! one element
    integer :: i,j,k,dof,  ndof, ndof1, is, is1

    real, dimension(:),allocatable:: accum

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(maxval(grid%elem%dof) * ndim))


    do i=1,grid%nelem
       elem => grid%elem(i)
       dof =  elem%dof
       ndof = dof * ndim
       is = elem%ncv

       ! diagonal block
       accum(1:ndof) = matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )

       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             ndof1 = elem1%dof * ndim
             is1 = elem1%ncv

             accum(1:ndof) = accum(1:ndof)  &
                  + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1),  x(is1: is1+ndof1-1) )
          endif
       enddo
       b(is: is+ndof-1) = accum(1:ndof)
    enddo

    deallocate(accum)

  end subroutine bMVprodA

  !> block matrix - vector product: \f$ b = Ax \f$,
  !>   \f$ A  \f$ is the block matrix grid.elem(*).block(*)
  !> ONLY OFF DIAGONAL TERMS
  subroutine bMVprodOffC(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem,elem1 ! one element
    integer :: i,j,k,dof,  ndof, ndof1, is, is1

    real, dimension(:),allocatable:: accum

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(maxval(grid%elem%dof) * ndim))


    !    print*,'B1'
    do i=1,grid%nelem
       elem => grid%elem(i)
       dof =  elem%dof
       ndof = dof * ndim
       is = elem%ncv

       accum(1:ndof) = 0.

       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             ndof1 = elem1%dof * ndim
             is1 = elem1%ncv

             accum(1:ndof) = accum(1:ndof)  &
                  + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1),  x(is1: is1+ndof1-1) )
          endif
       enddo
       b(is: is+ndof-1) = accum(1:ndof)
    enddo

    deallocate(accum)

  end subroutine bMVprodOffC


  !> evaluation of the Weighted Time Dependent residuum
  !> block matrix grid%elem(*)%block(*) + eta* grid%elem(*)%Mass(*)
  !> right-hand-side  b
  !> solution  x,
  !> res_new_old = (old residuum - new_residuum)
  subroutine EvalWeightTDresid(b, x, nsize, residuum, res_new_old, ires_no )
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x, b
    real, intent(inout) :: residuum, res_new_old
    integer, intent(in) :: ires_no  ! = 1 => compute res_new_old
    class(element), pointer:: elem,elem1 ! one element
    integer :: i,j,k, ndof, ndof1, is, is1
    real, dimension(:),allocatable:: accum
    real :: max_res

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(maxval(grid%elem%dof) * ndim))

    max_res = 0.
    residuum = 0.
    res_new_old = 0.

    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof1 = elem%dof
       ndof = elem%dof * ndim
       is = elem%ncv

       ! diagonal block
       if (eta /= 0.) then
          do k = 0,ndof-1,ndof1
             accum(k+1:k+ndof1) = eta &
                  * matmul(elem%Mass%Mb(1:ndof1, 1:ndof1), x(is+k: is+k+ndof1-1))
          enddo
          accum(1:ndof) = accum(1:ndof) &
               + matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
       else
          accum(1:ndof) = matmul(elem%block(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )
       endif


       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             ndof1 = elem1%dof * ndim
             is1 = elem1%ncv

             accum(1:ndof) = accum(1:ndof) &
                  + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1), x(is1: is1+ndof1-1) )
          endif
       enddo

       accum(1:ndof) = (accum(1:ndof) - b(is: is+ndof-1) )/ elem%area

       ! linear algebra residuum
       residuum = residuum + dot_product(accum(1:ndof), accum(1:ndof))

       ! difference od the old and new linear algebra residuum
       if(ires_no == 1) &
            res_new_old = res_new_old &
            + dot_product(accum(1:ndof) - elem%vec(res_vec,1:ndof), &
            accum(1:ndof) - elem%vec(res_vec,1:ndof))

       elem%vec(res_vec,1:ndof) = accum(1:ndof)

    enddo

    deallocate(accum)

    residuum = (residuum)**0.5 / state%space%domain_volume

    if(ires_no == 1) &
         res_new_old = (res_new_old)**0.5  / state%space%domain_volume

    !print*,'End of EvalWeightTDresid'

  end subroutine EvalWeightTDresid

  function EvalSSresid()
    real :: EvalSSresid
    class(element), pointer:: elem,elem1 ! one element
    integer :: i, j, k, ndof, ndof1

    real, dimension(:),allocatable:: accum

    ! allocate accum once to accomodate for the largest dof.
    allocate(accum(maxval(grid%elem%dof) * ndim))

    !    print*,'B1'
    EvalSSresid = 0.
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof  * ndim

       ! diagonal block
       accum(1:ndof) = matmul(elem%block(0)%Mb(1:ndof, 1:ndof), elem%w(0,1:ndof) )

       !! off-diagonal blocks
       do j=1,elem%flen
          k = grid%elem(i)%face(neigh,j)

          if(k > 0) then
             elem1 => grid%elem(k)
             ndof1 = elem1%dof * ndim

             accum(1:ndof) = accum(1:ndof)  &
                  + matmul(elem%block(j)%Mb(1:ndof, 1:ndof1), elem1%w(0,1:ndof1) )
          endif
       enddo

       accum(1:ndof) = (accum(1:ndof) - elem%vec(rhs,1:ndof))/elem%area

       EvalSSresid = EvalSSresid + dot_product(accum(1:ndof), accum(1:ndof))


       !do j=1,ndim
       !write(*, '(a4,i5,20es12.4)') 'impl', i, accum(1:min(ndof,6) )

         !write(100+state%time%iter, '(2i5,20es12.4)') &
               !i, j, accum((j-1)*elem%dof+1 : j*elem%dof)
       !enddo
       !write(100+state%time%iter, '(i5,25es12.4)') i, elem%xc(1:nbDim), abs(accum(1:ndof))

    enddo


!    print*,'@@@',state%nsize, state%space%domain_volume, (EvalSSresid)**0.5 / state%space%domain_volume, &
!         (EvalSSresid/state%nsize)**0.5

    !EvalSSresid = (EvalSSresid)**0.5 / state%space%domain_volume
    EvalSSresid = (EvalSSresid/state%nsize)**0.5
    deallocate(accum)

  end function EvalSSresid

  function EvalSSresidExplicit()
    real :: EvalSSresidExplicit
    class(element), pointer:: elem ! one element
    integer :: i, k, dofP, dof,l1, l2

    EvalSSresidExplicit = 0.

    do i=1,grid%nelem
       elem => grid%elem(i)
       dof = elem%dof
       dofP = elem%dof_plus

       do k=1 , ndim
          l1 = (k-1)*dofP + 1
          l2 = (k-1)*dofP + dof

          EvalSSresidExplicit = EvalSSresidExplicit &
               + dot_product(elem%vec(rhs,l1:l2), elem%vec(rhs,l1:l2))/elem%area**2

          !write(200+state%time%iter_loc, '(20es12.4)') elem%xc(:), &
          !     dot_product(elem%vec(rhs,l1:l2), elem%vec(rhs,l1:l2))/elem%area**2
       enddo
    enddo

    !EvalSSresidExplicit = (EvalSSresidExplicit)**0.5 / state%space%domain_volume
    EvalSSresidExplicit = sqrt(EvalSSresidExplicit) / state%nsize

    !print*,'AA:', EvalSSresidExplicit

  end function EvalSSresidExplicit


  !> NO preconditioner: \f$ b = x \f$
  subroutine bMVnull(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b

    b(1:nsize) = x(1:nsize)
  end subroutine bMVnull

  !> diagonal block matrix - vector product: \f$ b = Ax \f$,  \f$ A  \f$ is the block
  !> matrix grid.elem(*).block(*)
  subroutine bMVdiagprod(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem ! one element
    integer :: i, ndof, is

!    print*,'B1'
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof * ndim
       is = elem%ncv

       ! diagonal block
       b(is: is+ndof-1) = matmul(elem%ILU(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )

       !!call WriteMblock(elem%ILU(0) )
    enddo

  end subroutine bMVdiagprod

  !> diagonal block matrix - vector product for STDGM: \f$ b = Ax \f$,  \f$ A  \f$ is the block
  !> matrix grid.elem(*).block(*)
  subroutine bMVdiagprodST(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem ! one element
    integer :: i, ndof, is

!    print*,'B1'
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof * elem%Tdof * ndim
       is = elem%ncv

       ! diagonal block
       b(is: is+ndof-1) = matmul(elem%ILU(0)%Mb(1:ndof, 1:ndof), x(is: is+ndof-1) )

       !!call WriteMblock(elem%ILU(0) )
    enddo

  end subroutine bMVdiagprodST

    !> diagonal block matrix - vector product for Dual problem for STDGM: \f$ b = A^T * x \f$,  \f$ A  \f$ is the block
  !> matrix grid.elem(*).blockST(*)
  subroutine bMVdiagprodST_Dual(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    class(element), pointer:: elem ! one element
    integer :: i, ndof, iv

!    print*,'B1'
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof * elem%Tdof * ndim
       iv = elem%ncv

       ! diagonal block - blockST is transposed for dual problem
       b(iv: iv+ndof-1) = matmul( x(iv: iv+ndof-1) , elem%ILU(0)%Mb(1:ndof, 1:ndof)  )

       !!call WriteMblock(elem%ILU(0) )
    enddo

  end subroutine bMVdiagprodST_Dual


  subroutine bMVdiagprod2(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    integer:: n
    n = nsize/2
    call bMVdiagprod(b,x,n)
    call bMVdiagprod(b(1+n),x(1+n),n)
  end subroutine bMVdiagprod2

  !> "preconditioned" norm of the vector \f$ \| x \|_P = \| Px\|_{\ell 2}\f$,
  !> where \f$ P \f$ is the actual matrix of ILU decomposition
  function VectorPrecondNorm(x)
    real :: VectorPrecondNorm
    real, dimension(:), intent(in) :: x
    real, dimension(:), allocatable :: y
    integer :: nsize

    nsize = size(x)
    allocate(y(1:nsize) )

!    if (state%dual) &
!      stop 'Vector precond norm not implemented for preconditioned Dual problem'

    if ( state%time%disc_time == 'STDG' ) then
       call bMViLUprodBIG(y ,x, nsize) ! FR_CONTROL
    else
       call bMViLUprod(y, x ,nsize)
    endif
    VectorPrecondNorm = sqrt( dot_product(y(:), y(:) ))

    deallocate(y)

  end function VectorPrecondNorm

  !> "preconditioning"  the vector \f$  x  \Rightarrow  Px \f$,
  !> where \f$ P \f$ is the actual matrix of ILU decomposition
  subroutine VectorPrecond(x, y)
    real, dimension(:), intent(in) :: x
    real, dimension(:), intent(out) :: y
    integer :: nsize

    nsize = size(x)
    if(nsize /= size(y) ) stop "bad sizes in VectorPrecond"

    if ( state%time%disc_time == 'STDG' ) then
       call bMViLUprodST( y, x, nsize )
    else
       call bMViLUprod(y, x ,nsize)
    endif

    !VectorPrecondNorm = dot_product(y(:), y(:) )**0.5
    !deallocate(y)

  end subroutine VectorPrecond

  !> "scaled" norm of the vector \f$ \| x \|_S = \| Sx\|_{\ell 2}\f$,
  !> where \f$ S \f$ is a scaling matrix
  function VectorScaleNorm(x)
    real :: VectorScaleNorm
    real, dimension(:), intent(in) :: x
    real, dimension(:), allocatable :: y
    class(element), pointer:: elem ! one element
    integer :: i, j, k, dof, is, is1
    real :: Re1

    Re1 = 0.
    if(state%model%Re > 0) Re1 = 1./state%model%Re
    allocate(y(1:state%space%max_dof) )

    VectorScaleNorm = 0.

    do i=1,grid%nelem
       elem => grid%elem(i)

       dof = elem%dof
       do k=1, ndim
          is  = elem%ncv + (k-1)*ndim
          is1 = is + dof - 1
          y(1:dof) = x(is:is1)  !/ (2*elem%area)
          do j=1,dof

             y(j) = y(j) / (elem%Mass%Mb(j,j) + elem%Stiff%Mb(j,j) * Re1 )**0.5
             !y(j) = y(j) / (elem%Mass%Mb(j,j) + elem%Stiff%Mb(j,j) )**0.5
          enddo
          VectorScaleNorm = VectorScaleNorm + dot_product( y(1:dof), y(1:dof) )

       enddo
    enddo

    !print*,'####',VectorScaleNorm, smaz, VectorScaleNorm / smaz

    VectorScaleNorm = VectorScaleNorm**0.5


    deallocate(y)

  end function VectorScaleNorm

  !> ILU preconditioning: \f$ b = (LU)^{-1}x \f$,  \f$ LU  \f$ is the incomplete LU block
  !> preconditioner having the same structure as matrix
  subroutine bMViLUprod(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    real, dimension(:), allocatable :: y
    class(element), pointer:: elem, elem1 ! one element
    type(Mblock) :: Loc

    integer :: i, ndof, is, j, i1, ndof1, is1, ii

    call InitMblock(Loc, grid%elem(1)%dof * ndim, grid%elem(1)%dof * ndim)

    allocate(y(1:nsize) )

    !! L solution
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof = elem%dof * ndim
       is = elem%ncv

       y(is: is+ndof-1) = x(is: is+ndof-1)

       do j=1,elem%flen
          i1 = elem%face(neigh,j)
          if(i1 > 0 .and. i1 < i) then
             elem1 => grid%elem(i1)
             ndof1 = elem1%dof * ndim
             is1 = elem1%ncv

             y(is: is+ndof-1) = y(is: is+ndof-1) &
                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), y(is1: is1+ndof1-1) )

             if(is1 > is) print*,'%%%%%%%%%%%%%%%%%%%%%%%???',is,is1, i,i1

          endif
       enddo
    enddo

    !! U solution
    do ii=1,grid%nelem
       i = grid%nelem - ii + 1

       elem => grid%elem(i)
       ndof = elem%dof * ndim
       is = elem%ncv

       do j=1,elem%flen
          i1 = elem%face(neigh,j)

          if( i1 > i) then
             elem1 => grid%elem(i1)
             ndof1 = elem1%dof * ndim
             is1 = elem1%ncv

             y(is: is+ndof-1) = y(is: is+ndof-1) &
                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), b(is1: is1+ndof1-1) )

             if(is1 < is) print*,'UUU%%%%%%%%%%%%%%%%%%%%%%%???',is,is1, i,i1
          endif
       enddo

       if(ndof .ne. size(Loc%Mb,1)) then
          deallocate (Loc%Mb)
          call InitMblock(Loc, ndof, ndof)
       endif

       Loc%Mb(1:ndof,1:ndof) = grid%elem(i)%ILU(0)%Mb(1:ndof,1:ndof)
       call MblockInverse(ndof, Loc%Mb)

       b(is: is+ndof-1) = matmul(Loc%Mb(1:ndof,1:ndof), y(is: is+ndof-1) )

    enddo


    deallocate (Loc%Mb)
    deallocate(y)

  end subroutine bMViLUprod

  subroutine bMViLUprod2(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    integer:: n
    n = nsize/2
    call bMViLUprod(b,x,n)
    call bMViLUprod(b(1+n),x(1+n),n)
  end subroutine bMViLUprod2

  !> ILU preconditioning for STDGM: \f$ b = (LU)^{-1}x \f$,  \f$ LU  \f$ is the incomplete LU block
  !> preconditioner having the same structure as matrix
  subroutine bMViLUprodST(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    real, dimension(:), allocatable :: y
    class(element), pointer:: elem, elem1 ! one element
    type(Mblock) :: Loc
    integer :: i, ndof, is, j, i1, ndof1, is1, ii

    print*, "bMViLUprodST should not be used anymore, use bMViLUprodBIG instead!"

    ndof = grid%elem(1)%dof * grid%elem(1)%Tdof * ndim
    call InitMblock(Loc, ndof, ndof)


    allocate( y(1:nsize) )

    !! L solution - L^{-1}: for elem (row) in elem%ILU(:) are saved the columns of this row
    do i=1,grid%nelem ! go through the rows of the matrix
       elem => grid%elem(i)
       ndof = elem%dof * elem%Tdof * ndim
       is = elem%ncv
       ! there is 1 on the diagonal of L^(-1)
       y(is: is+ndof-1) = x(is: is+ndof-1)

       ! go through the columns
       do j=1,elem%flen
          i1 = elem%face(neigh,j)
          if(i1 > 0 .and. i1 < i) then ! elem1 is before elem in the matrix
             elem1 => grid%elem(i1)
             ndof1 = elem1%dof * elem1%Tdof * ndim
             is1 = elem1%ncv

!             print*, 'ndof,ndof1, is1:' , ndof,ndof1, is1
!             print*, size( elem%ILU(j)%Mb(:,1) ) , size(elem%blockST(j)%Mb(:,1))
!             print*, elem%ILU(j)%Mb(1:ndof, 1:ndof1)
!             print*, 'dfs'
!             print*,y(is1: is1+ndof1-1)
!             print*, y(is: is+ndof-1)

             y(is: is+ndof-1) = y(is: is+ndof-1) &
                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), y(is1: is1+ndof1-1) )

             if(is1 > is) print*,'Problem in bMViLUProdST',is,is1, i,i1

          endif
       enddo
    enddo

    !! U solution
    do ii=1,grid%nelem
       i = grid%nelem - ii + 1

       elem => grid%elem(i)
       ndof = elem%dof * elem%Tdof * ndim
       is = elem%ncv

       do j=1,elem%flen
          i1 = elem%face(neigh,j)

          if( i1 > i) then
             elem1 => grid%elem(i1)
             ndof1 = elem1%dof * elem1%Tdof * ndim
             is1 = elem1%ncv

             y(is: is+ndof-1) = y(is: is+ndof-1) &
                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), b(is1: is1+ndof1-1) )

             if(is1 < is) print*,'Problem in bMViLUProdST',is,is1, i,i1
          endif
       enddo

       if(ndof .ne. size(Loc%Mb,1)) then
          deallocate (Loc%Mb)
          call InitMblock(Loc, ndof, ndof)
       endif

       Loc%Mb(1:ndof,1:ndof) = grid%elem(i)%ILU(0)%Mb(1:ndof,1:ndof)
       call MblockInverse(ndof, Loc%Mb)

       b(is: is+ndof-1) = matmul(Loc%Mb(1:ndof,1:ndof), y(is: is+ndof-1) )

    enddo


    deallocate (Loc%Mb)
    deallocate(y)

  end subroutine bMViLUprodST


   !> ILU preconditioning for STDGM: \f$ b = (LU)^{-1}x \f$,  \f$ LU  \f$ is the incomplete LU block
  !> preconditioner having the same structure as matrix
  subroutine bMViLUprodBIG(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    real, dimension(:), allocatable :: y
    class(element), pointer:: elem, elem1 ! one element
    type(Mblock) :: Loc
    integer :: i, ndof, iss, j, i1, ndof1, is1, ii, max_dof, max_Tdof
    integer :: p_mod, q_mod

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize(p_mod, q_mod) ) then
      print*, "Wrong nsize in bMViLUprodBIG: " , nsize, state%bigNSize(p_mod, q_mod), p_mod
      stop
    endif


    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod
    ndof = max_dof * max_Tdof * ndim
    call InitMblock(Loc, ndof, ndof)

    allocate( y(1:nsize) )

    !! L solution - L^{-1}: for elem (row) in elem%ILU(:) are saved the columns of this row
    do i=1,grid%nelem ! go through the rows of the matrix
       elem => grid%elem(i)

       ndof = DOFtriang( elem%deg + p_mod) * &
          (elem%Tdof+q_mod) * ndim
       iss = elem%bigNcv(p_mod, q_mod) !%ncv
       ! there is 1 on the diagonal of L^(-1)
       y(iss: iss+ndof-1) = x(iss: iss+ndof-1)

       ! go through the columns
       do j=1,elem%flen
          i1 = elem%face(neigh,j)
          if(i1 > 0 .and. i1 < i) then ! elem1 is before elem in the matrix
             elem1 => grid%elem(i1)
             ndof1 = DOFtriang(elem1%deg + p_mod) * &
                      (elem1%Tdof+q_mod) * ndim
             is1 = elem1%bigNcv(p_mod, q_mod)
             if(is1 > iss) print*,'Problem in bMViLUProdBIG',iss,is1, i,i1

             y(iss: iss+ndof-1) = y(iss: iss+ndof-1) &
                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), y(is1: is1+ndof1-1) )

          endif
       enddo
    enddo

    !! U solution
    do ii=1,grid%nelem
       i = grid%nelem - ii + 1

       elem => grid%elem(i)
       ndof = DOFtriang( elem%deg + p_mod) * &
          (elem%Tdof+q_mod) * ndim
       iss = elem%bigNcv(p_mod, q_mod) !%ncv

       do j=1,elem%flen
          i1 = elem%face(neigh,j)

          if( i1 > i) then
             elem1 => grid%elem(i1)
             ndof1 = DOFtriang(elem1%deg + p_mod) * &
                      (elem1%Tdof+q_mod) * ndim
             is1 = elem1%bigNcv(p_mod, q_mod)
             if(is1 < iss) print*,'Problem in bMViLUProdBIG',iss,is1, i,i1

             y(iss: iss+ndof-1) = y(iss: iss+ndof-1) &
                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), b(is1: is1+ndof1-1) )

          endif
       enddo

       if(ndof .ne. size(Loc%Mb,1)) then
          deallocate (Loc%Mb)
          call InitMblock(Loc, ndof, ndof)
       endif

       Loc%Mb(1:ndof,1:ndof) = grid%elem(i)%ILU(0)%Mb(1:ndof,1:ndof)
       call MblockInverse(ndof, Loc%Mb)

       b(iss: iss+ndof-1) = matmul(Loc%Mb(1:ndof,1:ndof), y(iss: iss+ndof-1) )

    enddo

    deallocate (Loc%Mb)
    deallocate(y)

  end subroutine bMViLUprodBIG



  !> ILU preconditioning for STDGM DUAL PROBLEM: \f$ b = (LU)^{-T} x\f$,  \f$ LU  \f$ is the incomplete LU block
  !> we do not have L^{-1} neither , only implicitly as forward Gauss elimination)
  !> 1. solve: U^T z = x
  !> 2. multiply (G.e.): b = L^{-T} z
  !> the ordering of multiplication is reversed from the primal problem
  !> preconditioner having the same structure as matrix
  subroutine bMViLUprodST_Dual(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    real, dimension(:), allocatable :: z
    class(element), pointer:: elem, elem1 ! one element
    type(Mblock) :: Loc
    integer :: i, ii, ndof, dof, ncv, j, i1, ndof1, is, is1, loc_neigh

    print*, "bMViLUprodST_Dual should not be used anymore, use bMViLUprodBIG_Dual instead!"

    ndof = grid%elem(1)%dof * grid%elem(1)%Tdof * ndim
    call InitMblock(Loc, ndof, ndof)

    allocate( z(1:nsize), source = 0.0 )


    !! U^Tz=x solution -> z
    do i=1,grid%nelem
       !i = ii ! grid%nelem - ii + 1
       elem => grid%elem(i)
       ndof = elem%dof * elem%Tdof * ndim
       is = elem%ncv
       z(is: is+ndof-1) = x(is:is+ndof-1)

       do j=1,elem%flen
          i1 = elem%face(neigh,j)

          if( i1>0 .and. i1 < i) then

             elem1 => grid%elem(i1)
             ndof1 = elem1%dof * elem1%Tdof * ndim
             is1 = elem1%ncv
             loc_neigh = elem%face(nei_i, j)
               ! zi = zi - u_ij^T zj
             z(is: is+ndof-1) = z(is: is+ndof-1) &
                - matmul( z(is1:is1+ndof1-1), elem1%ILU(loc_neigh)%Mb(1:ndof1, 1:ndof) )
!               - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), b(is1: is1+ndof1-1) )

             if(is1 > is) print*,'Problem in bMViLUProdST_Dual',is,is1, i,i1
          endif
       enddo

       if(ndof .ne. size(Loc%Mb,1)) then
          deallocate (Loc%Mb)
          call InitMblock(Loc, ndof, ndof)
       endif

       Loc%Mb(1:ndof,1:ndof) = grid%elem(i)%ILU(0)%Mb(1:ndof,1:ndof)
       call MblockInverse(ndof, Loc%Mb)
       ! z_i = z_i / u_ii
       z(is: is+ndof-1) = matmul( z(is: is+ndof-1), Loc%Mb(1:ndof,1:ndof) )

    enddo

    ! multiplication: b = L^{-T} z / we do not have L^{-1} => forward Gauss elimination of z
    do ii=1,grid%nelem ! go through the COLUMNS of the matrix (backwards)
       i = grid%nelem - ii + 1
       elem => grid%elem(i)
       ndof = elem%dof * elem%Tdof * ndim
       is = elem%ncv
       ! there is 1 on the diagonal of L^(-1)
       b(is: is+ndof-1) = z(is: is+ndof-1)

       ! go through the ROWS
       do j=1,elem%flen
          i1 = elem%face(neigh,j)
          if(i1 > i) then ! elem1 is below elem in the matrix L
             elem1 => grid%elem(i1)
             ndof1 = elem1%dof * elem1%Tdof * ndim
             is1 = elem1%ncv
             loc_neigh = elem%face(nei_i, j) ! local index of elem as an neighbor of elem1

             b(is: is+ndof-1) = b(is: is+ndof-1) &
               - matmul( b(is1: is1+ndof1-1), elem1%ILU(loc_neigh)%Mb(1:ndof1, 1:ndof) ) ! dual
!                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), y(is1: is1+ndof1-1) ) ! from primal

             if(is1 < is) print*,'Problem in bMViLUProdST',is,is1, i,i1

          endif
       enddo
    enddo


    deallocate (Loc%Mb)
    deallocate(z)

  end subroutine bMViLUprodST_Dual


  !> ILU preconditioning for STDGM DUAL PROBLEM: \f$ b = (LU)^{-T} x\f$,  \f$ LU  \f$ is the incomplete LU block
  !> we do not have L^{-1}, only implicitly as forward Gauss elimination)
  !> 1. solve: U^T z = x
  !> 2. multiply (G.e.): b = L^{-T} z
  !> the ordering of multiplication is reversed from the primal problem
  !> preconditioner having the same structure as matrix
  subroutine bMViLUprodBIG_Dual(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    real, dimension(:), allocatable :: z
    class(element), pointer:: elem, elem1 ! one element
    type(Mblock) :: Loc
    integer :: i, ii, ndof, dof, ncv, j, i1, ndof1, iss, is1, loc_neigh
    integer :: max_dof, max_Tdof
    integer :: p_mod, q_mod

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize(p_mod, q_mod) ) &
      & stop "Wrong nsize in bMViLUprodBIG_Dual"

    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod
    ndof = max_dof * max_Tdof * ndim
    call InitMblock(Loc, ndof, ndof)

    allocate( z(1:nsize), source = 0.0 )

    !! U^Tz=x solution -> z
    do i=1,grid%nelem
       !i = ii ! grid%nelem - ii + 1
       elem => grid%elem(i)
       ndof = DOFtriang( elem%deg + p_mod) * &
          (elem%Tdof+q_mod) * ndim
       iss = elem%bigNcv(p_mod, q_mod) !%ncv

       z(iss: iss+ndof-1) = x(iss:iss+ndof-1)

       do j=1,elem%flen
          i1 = elem%face(neigh,j)
          if( i1>0 .and. i1 < i) then

             elem1 => grid%elem(i1)
             ndof1 = DOFtriang(elem1%deg + p_mod) * &
                      (elem1%Tdof+q_mod) * ndim
             is1 = elem1%bigNcv(p_mod, q_mod)
             loc_neigh = elem%face(nei_i, j)
               ! zi = zi - u_ij^T zj
             z(iss: iss+ndof-1) = z(iss: iss+ndof-1) &
                - matmul( z(is1:is1+ndof1-1), elem1%ILU(loc_neigh)%Mb(1:ndof1, 1:ndof) )
!               - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), b(is1: is1+ndof1-1) )

             if(is1 > iss) print*,'Problem in bMViLUProdBIG_Dual',iss,is1, i,i1
          endif
       enddo

       if(ndof .ne. size(Loc%Mb,1)) then
          deallocate (Loc%Mb)
          call InitMblock(Loc, ndof, ndof)
       endif

       Loc%Mb(1:ndof,1:ndof) = grid%elem(i)%ILU(0)%Mb(1:ndof,1:ndof)
       call MblockInverse(ndof, Loc%Mb)
       ! z_i = z_i / u_ii
       z(iss: iss+ndof-1) = matmul( z(iss: iss+ndof-1), Loc%Mb(1:ndof,1:ndof) )

    enddo

    ! multiplication: b = L^{-T} z / we do not have L^{-1} => forward Gauss elimination of z
    do ii=1,grid%nelem ! go through the COLUMNS of the matrix (backwards)
       i = grid%nelem - ii + 1
       elem => grid%elem(i)
       ndof = DOFtriang( elem%deg + p_mod) * &
          (elem%Tdof+q_mod) * ndim
       iss = elem%bigNcv(p_mod, q_mod) !%ncv
       ! there is 1 on the diagonal of L^(-1)
       b(iss: iss+ndof-1) = z(iss: iss+ndof-1)

       ! go through the ROWS
       do j=1,elem%flen
          i1 = elem%face(neigh,j)
          if(i1 > i) then ! elem1 is below elem in the matrix L
             elem1 => grid%elem(i1)
             ndof1 = DOFtriang(elem1%deg + p_mod) * &
                      (elem1%Tdof+q_mod) * ndim
             is1 = elem1%bigNcv(p_mod, q_mod)
             loc_neigh = elem%face(nei_i, j) ! local index of elem as an neighbor of elem1

             b(iss: iss+ndof-1) = b(iss: iss+ndof-1) &
               - matmul( b(is1: is1+ndof1-1), elem1%ILU(loc_neigh)%Mb(1:ndof1, 1:ndof) ) ! dual
!                  - matmul(elem%ILU(j)%Mb(1:ndof, 1:ndof1), y(is1: is1+ndof1-1) ) ! from primal

             if(is1 < iss) print*,'Problem in bMViLUProdBIG',iss,is1, i,i1

          endif
       enddo
    enddo

    deallocate (Loc%Mb)
    deallocate(z)

  end subroutine bMViLUprodBIG_Dual


  !>      null_precond  preconditioner
  subroutine null_precond(z,g,nsize,spars,irwst, idx)
    integer ::irwst(*), idx(*), nsize
    real z(*), g(*), spars(*)

    z(1:nsize)=g(1:nsize)
  end subroutine null_precond


  !>  Block Jacobi  preconditioner
  subroutine BlockJacobi(z,g,nsize,spars_prec,irwst, idx)
    integer :: i, j, iend, istart, irwst(*), idx(*), nsize
    real sum, z(*), g(*), spars_prec(*)


    !multiplication spars_prec*g

    do i=1,nsize
       istart=irwst(i)
       iend=irwst(i+1)-1
       !       print*,'###',i,istart,iend, j, idx(j),spars(j)
       sum=0.0
       do  j=istart,iend
          sum=sum+g(idx(j))*spars_prec(j)
          !print*,'###',i,istart,iend, j, idx(j),spars(j), sum
       enddo
       z(i)=sum
       !print*,'-----------------',i, sum
    enddo

  end subroutine BlockJacobi

  !> solution of \f$ (M+\tau C_k) x = b \f$ using Taylor serie,
  !> \f$ C_k = D_k + E_k \f$, $D_k is block diagonal part of \f$ C_k\f$
  subroutine TaylorSolution(nsize, x, b, tol, it, rezid, not_converge)
    !external prod
    integer, intent(in)  :: nsize
    integer, intent(out) :: it
    real, dimension(1:nsize), intent(inout) :: x, b
    real, intent(in) :: tol
    real, intent(out) :: rezid
    integer, intent(inout) :: not_converge   ! not converge = 1, converge = 0
    real, dimension(:,:), allocatable :: q
    integer :: itaylor

    allocate(q(1:nsize,1:nbDim) )

    not_converge = 0

!    call bMVprod(q(:,3), x(:), nsize)
!    q(:,3) = q(:,3) - b(:)
!    sum = dot_product(q(:,3) , q(:,3) )**0.5
!    print*,'reziduum',0,sum

    ! block multiplication $ q1 = (M+\tau D)^{-1} b $
    call bMVdiagprod(q(:,1), b(:), nsize)

    ! Taylor Series, zero-th step
    x(1:nsize) = q(:,1)

    ! iterative cycle
    do itaylor = 1, 500
       ! block matrix multiplication $ q1 = (M+\tau D)^{-1} E_k q1 $

       call bMVprodOffC(q(:,2), q(:,1), nsize)
       call bMVdiagprod(q(:,1), q(:,2), nsize)

       q(:,1) = - q(:,1)

       x(:) = x(:) + q(:,1)

       !call bMVprod(q(:,3), x(:), nsize)
       !q(:,3) = q(:,3) - b(:)
       !rezid = dot_product(q(:,3) , q(:,3) )**0.5

       rezid = (dot_product(q(:,1) , q(:,1) ) / dot_product(x(:) , x(:) ))**0.5
       !print*,'it rez sum',itaylor,rezid, sum

       if(rezid < tol) goto 100
       not_converge = 1
    enddo

100 continue

    it = itaylor

    deallocate( q )

  end subroutine TaylorSolution


  !> matrix - vector product for conforming FEM
  subroutine prodFEM(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    integer :: i, j

    b(:) = 0.
    do i=1,nsize
       do j= Mshape%irwst(i), Mshape%irwst(i+1)-1
          b(i) = b(i) + state%A(j) * x(Mshape%idx(j))
       enddo
    enddo

  end subroutine prodFEM

  !> diagonal preconditioner for conforming FEM
  subroutine diagFEM(b,x,nsize)
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b
    integer :: i, j

    b(:) = x(:)
    do i=1,nsize
       j= Mshape%irwst(i) ! first element is the diagonal one
       if(state%A(j) /= 0.)   b(i) = b(i) / state%A(j)

       if(Mshape%idx(j) /= i) then
          print*,'NOn-diagonal element in diagFEM !!!'
          stop
       end if
    enddo

  end subroutine diagFEM


  !> block matrix - vector product: \f$ b = (\eta M+ C_k) x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).block(*),
  !> \f$ \eta = 1/\tau_k\f$ is external
  subroutine bMVprod_SCHUR(b,x,nsize)
    use matrix_oper_int
    integer, intent (in):: nsize
    real, dimension(1:nsize), intent(in) :: x
    real, dimension(1:nsize), intent(inout) :: b

    b(1:nsize) = matmul(state%Schur_A(1:nsize, 1:nsize), x(1:nsize) )

  end subroutine bMVprod_SCHUR

!  !> copy elem%blockST(0) to blockPlus - used for Ritz reconstruction
!  subroutine CopyBlocksSTtoBlockPlus(  )
!    class( element ), pointer :: elem
!!    integer, intent(in) :: dof ! size of the block
!    integer i,j, dof
!
!    print*, ' CopyBlocksSTtoBlockPlus called'
!
!    do i = 1, grid%nelem
!      elem => grid%elem(i)
!      dof = size( elem%blockST(0)%Mb(:,1) )
!
!      if (.not. allocated(elem%blockPlus) ) then
!         allocate( elem%blockPlus )
!         call InitMblock( elem%blockPlus, dof, dof )
!
!      else if ( size(elem%blockPlus%Mb(:,1) ) /= dof ) then
!         stop 'wrong size of blockPlus in CopyBlocksSTtoBlockPlus'
!      end if
!
!      elem%blockPlus%Mb(1:dof, 1:dof) = elem%blockST(0)%Mb(1:dof, 1:dof)
!    end do !i
!
!  end subroutine CopyBlocksSTtoBlockPlus

  !  ! reduce the size of the blocks blockST to new size
  !  subroutine reduceMatrixBlocks( elem, )

  !> write the global matrix in the block sparse row (BSR) format.
  !> block matrix - vector product: \f$ b = (\theeta M+ C_k) x \f$,
  !> \f$ M  \f$ is the block mass matrix grid.elem(*).Mass multiplied by state%time%refTimeMatrix,
  !> \f$ C  \f$ is the block flux matrix grid.elem(*).blockST(*),
  !> \f$ \theta = 1/\tau_k\f$ is external
  subroutine WriteBlockLinearSTDGMsystem(nsize, theeta, b, x, n_iter, t_iter)
    use matrix_oper_int
    integer, intent (in):: nsize  ! size of the large system
    real, intent (in):: theeta       !  one over time step
    real, dimension(1:nsize), intent(in) :: x     ! initial solution (= 0)
    real, dimension(1:nsize), intent(inout) :: b  ! right-hand side
    integer, intent (in):: n_iter                 ! index of the Newton iteration
    integer, intent (in):: t_iter                 ! index of the time step

    integer :: ifile, ifile1
    class(element), pointer:: elem,elem1 ! one element
    integer :: i, j, k, l, m, n, mm, nn, face, p, pp, r, k1, k2
    integer :: dof, Bdof, dof_tot, ndof, ndof1, Tdof, is, is1
    real, dimension(:),allocatable :: accum
    real, dimension(:,:),allocatable :: Dblock
    integer :: newton_iter_to_write
    integer :: time_step_to_write

    newton_iter_to_write = 1
    time_step_to_write = 1

    ! number of the Newton iteration (n_iter) and the number of the time step (t_iter),
    ! when write the matrix
    print*,'n_iter, t_iter =', n_iter, t_iter + 1
    if(n_iter == newton_iter_to_write .and. t_iter == time_step_to_write - 1) then
       print*,' writing ..'

       ifile = 10
       open(ifile, file = 'BSR_matrix', status = 'unknown')

       ifile1 = 11
       open(ifile1, file = 'BSR_rhs', status = 'unknown')

       ! vector
       do j =  1, nsize
          write(ifile1, *)  b(j), x(j)
       enddo
       close(ifile1)


       ! counting of the number of non-vanishing blocks

       n = 0
       do i=1, grid%nelem
          elem => grid%elem(i)
          n = n+1 ! block diagonal
          do r=1,elem%flen ! off diagonal
             if( elem%face(neigh,r) >0) n = n+1
          enddo
       enddo

       ! expected size of the block
       Bdof = ndim * (state%space%deg+1)*(state%space%deg+2)/2 * (state%time%deg+1)

       write(ifile, *) nsize,      "  #  total size of the matrix"
       write(ifile, *) grid%nelem, "  #  number of the block rows"
       write(ifile, *) n,          "  #  total number of blocks"
       write(ifile, *) Bdof,       "  #  size of the blocks"


       associate( time => state%time )
       select type( time )
       type is ( TimeTDG_t )

          do i=1, grid%nelem
             elem => grid%elem(i)


             ! counting of the number of non-vanishing blocks in the row
             n = 1
             do r=1,elem%flen
                if( elem%face(neigh,r) >0) n = n+1
             enddo

             write(ifile,*) '--------------------'
             write(ifile,'(x)')
             write(ifile,*) 'block_row_index', i
             write(ifile,*) 'number_of_blocks_in_this_row', n
             write(ifile, '(x)')
             write(ifile,*) 'block_column_index', i


             is = elem%ncv

             dof = elem%dof
             Tdof = elem%Tdof
             ndof = dof * ndim

             if(Bdof /= ndof*Tdof) then
                print*,' Troubles with dimensions in WriteBlockLinearSTDGMsystem'
                print*,' probably varying polynomial degrees'
                stop
             endif


             allocate(Dblock(1:ndof*Tdof, 1:ndof*Tdof), source = 0.0 )

             ! terms with the time derivatives
             do m=1,Tdof
                mm = (m-1)* ndof

                do n=1,Tdof
                   nn = (n-1)* ndof

                   do k=1, ndim
                      k1 = (k-1)* dof + 1
                      k2 =  k  *  dof
                      Dblock(mm + k1 : mm+k2,  nn +k1 : nn+k2) =  eta &
                           * time%refTimeMatrix%Mb(m,n) * elem%Mass%Mb(1:dof,1:dof)
                   enddo
                enddo

             enddo

             ! adding of the original diagonal block
             !Dblock(1:Tdof*ndof, 1:Tdof*ndof) =  Dblock(1:Tdof*ndof, 1:Tdof*ndof)  &
             !     + elem%blockST(0)%Mb(1:Tdof*ndof, 1:Tdof*ndof)

             ! file output
             do m = 1, ndof*Tdof
                write(ifile,999) Dblock( m, 1:ndof*Tdof )
             enddo !m

             deallocate(Dblock)



             !offdiagonal blocks
             do r=1,elem%flen
                face = elem%face(neigh,r)
                if(face > 0) then
                   write(ifile, '(x)')
!!!!write(ifile,'(a6,i2,a2)') 'block(',r,'):'
                   write(ifile,*) 'block_column_index', face

                   elem1 => grid%elem(face)
                   ndof1 = elem1%dof * ndim * elem1%Tdof
                   is1 = elem1%ncv

                   !allocate(accum(1:ndof1))
                   do m = 1, ndof*Tdof
                      write(ifile,999) elem%blockST(r)%Mb( m, 1:ndof1 )
                   enddo !m
                endif
             enddo !r

          enddo !i


          close(ifile)

          class default
          stop  'WriteMatrixST_Blocks for STDGM only'
       end select
     end associate

     stop "stoped in  subroutine WriteBlockLinearSTDGMsystem in matrix.f90"

  endif

!999 format(500es12.4)
999 format(500es24.16)


    !print*,'#############E#E##E#E#', state%linSolver%iter
    !if(state%time%recompute_back == 2 .and. state%linSolver%iter >= 1 )stop "3d3ed388"

end subroutine WriteBlockLinearSTDGMsystem



end module matrix_oper
