########################################################################
# ISPACK FORTRAN SUBROUTINE LIBRARY FOR SCIENTIFIC COMPUTING
# Copyright (C) 1998--2016 Keiichi Ishioka <ishioka@gfd-dennou.org>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA.
########################################################################
.text
.globl lvoszg_
.globl _lvoszg_	
lvoszg_:
_lvoszg_:	
	movl   (%rdi), %edi  # : JB  rdi 

	# JB: rdi
        # AC: rsi	
	# SD: rdx	
	# Q: rcx

	shlq $6,%rdi # JB*8*8  rdi 
	movq %rdi,%r8	
	addq %r8,%r8
	addq %r8,%r8	
	addq %rdi,%r8	# r8  JB*8*8*5 
	
	addq %rcx,%r8

	vbroadcastsd   (%rdx),%zmm0 # SD1R  zmm0 8ս
	vbroadcastsd  8(%rdx),%zmm1 # SD2R  zmm1 8ս
	vbroadcastsd 16(%rdx),%zmm2 # SD3R  zmm2 8ս
	vbroadcastsd 24(%rdx),%zmm3 # SD4R  zmm3 8ս

	vbroadcastsd   (%rsi),%zmm8 # AC1  zmm0 8ս
	vbroadcastsd  8(%rsi),%zmm9 # AC2  zmm1 8ս
	vbroadcastsd 16(%rsi),%zmm10 # AC3  zmm2 8ս
	vbroadcastsd 24(%rsi),%zmm11 # AC4  zmm3 8ս

	movq %rcx,%rax

L00:
	vmovapd   (%rax),%zmm14 # X2
	vmovapd   %zmm14,%zmm15 # X2	
	vmovapd 64(%rax),%zmm12 # Q1
	vmovapd 128(%rax),%zmm13 # Q2

	vfmadd213pd %zmm9,%zmm8,%zmm15 # zmm15 = zmm8 * zmm15 + zmm9 	
	vfmadd213pd %zmm12,%zmm13,%zmm15 # zmm15 = zmm13 * zmm15 + zmm12
	vmovapd %zmm15,64(%rax) # Q1			

	vfmadd213pd %zmm11,%zmm10,%zmm14 # zmm14 = zmm10 * zmm14 + zmm11
	vfmadd213pd %zmm13,%zmm15,%zmm14 # zmm14 = zmm15 * zmm14 + zmm13
	vmovapd %zmm14,128(%rax) # Q2
	
	vmovapd 192(%rax),%zmm14 # G1R
	vfmadd231pd %zmm1,%zmm12,%zmm14 # zmm14 = zmm1 * zmm12 + zmm14	
	vfmadd231pd %zmm3,%zmm13,%zmm14 # zmm14 = zmm3 * zmm13 + zmm14	
	vmovapd %zmm14,192(%rax)

	vmovapd 256(%rax),%zmm14 # G2R
	vfmadd231pd %zmm0,%zmm12,%zmm14 # zmm14 = zmm0 * zmm12 + zmm14	
	vfmadd231pd %zmm2,%zmm13,%zmm14 # zmm14 = zmm2 * zmm13 + zmm14	
	vmovapd %zmm14,256(%rax)

	addq $320,%rax
	cmpq %rax,%r8
	jne L00

	ret
