Skip to content
Snippets Groups Projects
Commit a5baf703 authored by bRong Njam's avatar bRong Njam
Browse files

Add avx2/avx512 implementation of LENGTH

parent d6146907
No related branches found
No related tags found
No related merge requests found
......@@ -5,9 +5,12 @@ go 1.15
require (
github.com/aws/aws-sdk-go v1.37.14
github.com/klauspost/compress v1.11.7
github.com/frankban/quicktest v1.11.3 // indirect
github.com/mmcloughlin/avo v0.0.0-20210120082657-d60cc025fc3c // indirect
github.com/pierrec/lz4 v2.6.0+incompatible
github.com/pilosa/pilosa v1.4.0
github.com/traetox/goaio v0.0.0-20171005222435-46641abceb17
golang.org/x/text v0.3.3
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9 // indirect
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
)
// Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
#include "textflag.h"
// func strLengthAvx2Asm(x []uint32, r []int64)
// Requires: AVX, AVX2, SSE2
TEXT ·strLengthAvx2Asm(SB), NOSPLIT, $0-48
MOVQ x_base+0(FP), AX
MOVQ r_base+24(FP), CX
MOVQ x_len+8(FP), DX
blockloop:
CMPQ DX, $0x00000030
JL tailloop
MOVOU (AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, (CX)
MOVOU 16(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 32(CX)
MOVOU 32(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 64(CX)
MOVOU 48(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 96(CX)
MOVOU 64(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 128(CX)
MOVOU 80(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 160(CX)
MOVOU 96(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 192(CX)
MOVOU 112(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 224(CX)
MOVOU 128(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 256(CX)
MOVOU 144(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 288(CX)
MOVOU 160(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 320(CX)
MOVOU 176(AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, 352(CX)
ADDQ $0x000000c0, AX
ADDQ $0x00000180, CX
SUBQ $0x00000030, DX
JMP blockloop
tailloop:
CMPQ DX, $0x00000004
JL done
MOVOU (AX), X0
VPMOVZXDQ X0, Y0
VMOVDQU Y0, (CX)
ADDQ $0x00000010, AX
ADDQ $0x00000020, CX
SUBQ $0x00000004, DX
JMP tailloop
done:
RET
// Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
package length
func strLengthAvx2Asm(x []uint32, r []int64)
// Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
#include "textflag.h"
// func strLengthAvx512Asm(x []uint32, r []int64)
// Requires: AVX, AVX512F
TEXT ·strLengthAvx512Asm(SB), NOSPLIT, $0-48
MOVQ x_base+0(FP), AX
MOVQ r_base+24(FP), CX
MOVQ x_len+8(FP), DX
blockloop:
CMPQ DX, $0x00000060
JL tailloop
VMOVDQU (AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, (CX)
VMOVDQU 16(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 32(CX)
VMOVDQU 32(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 64(CX)
VMOVDQU 48(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 96(CX)
VMOVDQU 64(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 128(CX)
VMOVDQU 80(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 160(CX)
VMOVDQU 96(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 192(CX)
VMOVDQU 112(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 224(CX)
VMOVDQU 128(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 256(CX)
VMOVDQU 144(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 288(CX)
VMOVDQU 160(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 320(CX)
VMOVDQU 176(AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, 352(CX)
ADDQ $0x00000180, AX
ADDQ $0x00000300, CX
SUBQ $0x00000060, DX
JMP blockloop
tailloop:
CMPQ DX, $0x00000008
JL done
VMOVDQU (AX), Y0
VPMOVZXDQ Y0, Z0
VMOVDQU64 Z0, (CX)
ADDQ $0x00000020, AX
ADDQ $0x00000040, CX
SUBQ $0x00000008, DX
JMP tailloop
done:
RET
// Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
package length
func strLengthAvx512Asm(x []uint32, r []int64)
package length
import "matrixbase/pkg/container/vector"
import (
"matrixbase/pkg/container/types"
"golang.org/x/sys/cpu"
)
var (
bytesLength func(*vector.Bytes, []int64) []int64
strLength func(*types.Bytes, []int64) []int64
)
func init() {
bytesLength = bytesLengthPure
if cpu.X86.HasAVX2 {
strLength = strLengthAvx512
} else if cpu.X86.HasAVX2 {
strLength = strLengthAvx2
} else {
strLength = strLengthPure
}
}
func BytesLength(xs *vector.Bytes, rs []int64) []int64 {
return bytesLength(xs, rs)
func StrLength(xs *types.Bytes, rs []int64) []int64 {
return strLength(xs, rs)
}
func strLengthAvx2(xs *types.Bytes, rs []int64) []int64 {
lengths := xs.Lengths
n := len(lengths) / 4
strLengthAvx2Asm(lengths, rs)
for i, j := n*4, len(lengths); i < j; i++ {
rs[i] = int64(lengths[i])
}
return rs
}
func strLengthAvx512(xs *types.Bytes, rs []int64) []int64 {
lengths := xs.Lengths
n := len(lengths) / 8
strLengthAvx512Asm(lengths, rs)
for i, j := n*8, len(lengths); i < j; i++ {
rs[i] = int64(lengths[i])
}
return rs
}
func bytesLengthPure(xs *vector.Bytes, rs []int64) []int64 {
for i, n := range xs.Ns {
func strLengthPure(xs *types.Bytes, rs []int64) []int64 {
for i, n := range xs.Lengths {
rs[i] = int64(n)
}
return rs
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment