diff --git a/pkg/vectorize/add/add.go b/pkg/vectorize/add/add.go
index 8045c4703aa2fe1f9a4624f76ae17b2335198a0c..d890e0cc6fdcdbb492095a5975a09221045a9e23 100644
--- a/pkg/vectorize/add/add.go
+++ b/pkg/vectorize/add/add.go
@@ -15,766 +15,169 @@
 package add
 
 import (
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
-)
-
-var (
-	Int8Add                 func([]int8, []int8, []int8) []int8
-	Int8AddSels             func([]int8, []int8, []int8, []int64) []int8
-	Int8AddScalar           func(int8, []int8, []int8) []int8
-	Int8AddScalarSels       func(int8, []int8, []int8, []int64) []int8
-	Int16Add                func([]int16, []int16, []int16) []int16
-	Int16AddSels            func([]int16, []int16, []int16, []int64) []int16
-	Int16AddScalar          func(int16, []int16, []int16) []int16
-	Int16AddScalarSels      func(int16, []int16, []int16, []int64) []int16
-	Int32Add                func([]int32, []int32, []int32) []int32
-	Int32AddSels            func([]int32, []int32, []int32, []int64) []int32
-	Int32AddScalar          func(int32, []int32, []int32) []int32
-	Int32AddScalarSels      func(int32, []int32, []int32, []int64) []int32
-	Int64Add                func([]int64, []int64, []int64) []int64
-	Int64AddSels            func([]int64, []int64, []int64, []int64) []int64
-	Int64AddScalar          func(int64, []int64, []int64) []int64
-	Int64AddScalarSels      func(int64, []int64, []int64, []int64) []int64
-	Uint8Add                func([]uint8, []uint8, []uint8) []uint8
-	Uint8AddSels            func([]uint8, []uint8, []uint8, []int64) []uint8
-	Uint8AddScalar          func(uint8, []uint8, []uint8) []uint8
-	Uint8AddScalarSels      func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint16Add               func([]uint16, []uint16, []uint16) []uint16
-	Uint16AddSels           func([]uint16, []uint16, []uint16, []int64) []uint16
-	Uint16AddScalar         func(uint16, []uint16, []uint16) []uint16
-	Uint16AddScalarSels     func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint32Add               func([]uint32, []uint32, []uint32) []uint32
-	Uint32AddSels           func([]uint32, []uint32, []uint32, []int64) []uint32
-	Uint32AddScalar         func(uint32, []uint32, []uint32) []uint32
-	Uint32AddScalarSels     func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint64Add               func([]uint64, []uint64, []uint64) []uint64
-	Uint64AddSels           func([]uint64, []uint64, []uint64, []int64) []uint64
-	Uint64AddScalar         func(uint64, []uint64, []uint64) []uint64
-	Uint64AddScalarSels     func(uint64, []uint64, []uint64, []int64) []uint64
-	Float32Add              func([]float32, []float32, []float32) []float32
-	Float32AddSels          func([]float32, []float32, []float32, []int64) []float32
-	Float32AddScalar        func(float32, []float32, []float32) []float32
-	Float32AddScalarSels    func(float32, []float32, []float32, []int64) []float32
-	Float64Add              func([]float64, []float64, []float64) []float64
-	Float64AddSels          func([]float64, []float64, []float64, []int64) []float64
-	Float64AddScalar        func(float64, []float64, []float64) []float64
-	Float64AddScalarSels    func(float64, []float64, []float64, []int64) []float64
-	Decimal64Add            func([]types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64) []types.Decimal64
-	Decimal64AddSels        func([]types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64, []int64) []types.Decimal64
-	Decimal64AddScalar      func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64) []types.Decimal64
-	Decimal64AddScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64, []int64) []types.Decimal64
-	Decimal128Add           func([]types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128AddSels       func([]types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128AddScalar     func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128AddScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
 
-	Int32Int64Add               func([]int32, []int64, []int64) []int64
-	Int32Int64AddScalar         func(int32, []int64, []int64) []int64
-	Int32Int64AddSels           func([]int32, []int64, []int64, []int64) []int64
-	Int32Int64AddScalarSels     func(int32, []int64, []int64, []int64) []int64
-	Int16Int64Add               func([]int16, []int64, []int64) []int64
-	Int16Int64AddScalar         func(int16, []int64, []int64) []int64
-	Int16Int64AddSels           func([]int16, []int64, []int64, []int64) []int64
-	Int16Int64AddScalarSels     func(int16, []int64, []int64, []int64) []int64
-	Int8Int64Add                func([]int8, []int64, []int64) []int64
-	Int8Int64AddScalar          func(int8, []int64, []int64) []int64
-	Int8Int64AddSels            func([]int8, []int64, []int64, []int64) []int64
-	Int8Int64AddScalarSels      func(int8, []int64, []int64, []int64) []int64
-	Int16Int32Add               func([]int16, []int32, []int32) []int32
-	Int16Int32AddScalar         func(int16, []int32, []int32) []int32
-	Int16Int32AddSels           func([]int16, []int32, []int32, []int64) []int32
-	Int16Int32AddScalarSels     func(int16, []int32, []int32, []int64) []int32
-	Int8Int32Add                func([]int8, []int32, []int32) []int32
-	Int8Int32AddScalar          func(int8, []int32, []int32) []int32
-	Int8Int32AddSels            func([]int8, []int32, []int32, []int64) []int32
-	Int8Int32AddScalarSels      func(int8, []int32, []int32, []int64) []int32
-	Int8Int16Add                func([]int8, []int16, []int16) []int16
-	Int8Int16AddScalar          func(int8, []int16, []int16) []int16
-	Int8Int16AddSels            func([]int8, []int16, []int16, []int64) []int16
-	Int8Int16AddScalarSels      func(int8, []int16, []int16, []int64) []int16
-	Float32Float64Add           func([]float32, []float64, []float64) []float64
-	Float32Float64AddScalar     func(float32, []float64, []float64) []float64
-	Float32Float64AddSels       func([]float32, []float64, []float64, []int64) []float64
-	Float32Float64AddScalarSels func(float32, []float64, []float64, []int64) []float64
-	Uint32Uint64Add             func([]uint32, []uint64, []uint64) []uint64
-	Uint32Uint64AddScalar       func(uint32, []uint64, []uint64) []uint64
-	Uint32Uint64AddSels         func([]uint32, []uint64, []uint64, []int64) []uint64
-	Uint32Uint64AddScalarSels   func(uint32, []uint64, []uint64, []int64) []uint64
-	Uint16Uint64Add             func([]uint16, []uint64, []uint64) []uint64
-	Uint16Uint64AddScalar       func(uint16, []uint64, []uint64) []uint64
-	Uint16Uint64AddSels         func([]uint16, []uint64, []uint64, []int64) []uint64
-	Uint16Uint64AddScalarSels   func(uint16, []uint64, []uint64, []int64) []uint64
-	Uint8Uint64Add              func([]uint8, []uint64, []uint64) []uint64
-	Uint8Uint64AddScalar        func(uint8, []uint64, []uint64) []uint64
-	Uint8Uint64AddSels          func([]uint8, []uint64, []uint64, []int64) []uint64
-	Uint8Uint64AddScalarSels    func(uint8, []uint64, []uint64, []int64) []uint64
-	Uint16Uint32Add             func([]uint16, []uint32, []uint32) []uint32
-	Uint16Uint32AddScalar       func(uint16, []uint32, []uint32) []uint32
-	Uint16Uint32AddSels         func([]uint16, []uint32, []uint32, []int64) []uint32
-	Uint16Uint32AddScalarSels   func(uint16, []uint32, []uint32, []int64) []uint32
-	Uint8Uint32Add              func([]uint8, []uint32, []uint32) []uint32
-	Uint8Uint32AddScalar        func(uint8, []uint32, []uint32) []uint32
-	Uint8Uint32AddSels          func([]uint8, []uint32, []uint32, []int64) []uint32
-	Uint8Uint32AddScalarSels    func(uint8, []uint32, []uint32, []int64) []uint32
-	Uint8Uint16Add              func([]uint8, []uint16, []uint16) []uint16
-	Uint8Uint16AddScalar        func(uint8, []uint16, []uint16) []uint16
-	Uint8Uint16AddSels          func([]uint8, []uint16, []uint16, []int64) []uint16
-	Uint8Uint16AddScalarSels    func(uint8, []uint16, []uint16, []int64) []uint16
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
-func init() {
-
-	Decimal64Add = decimal64Add
-	Decimal64AddSels = decimal64AddSels
-	Decimal64AddScalar = decimal64AddScalar
-	Decimal64AddScalarSels = decimal64AddScalarSels
-	Decimal128Add = decimal128Add
-	Decimal128AddSels = decimal128AddSels
-	Decimal128AddScalar = decimal128AddScalar
+var (
+	Int8Add              = numericAdd[int8]
+	Int8AddScalar        = numericAddScalar[int8]
+	Int16Add             = numericAdd[int16]
+	Int16AddScalar       = numericAddScalar[int16]
+	Int32Add             = numericAdd[int32]
+	Int32AddScalar       = numericAddScalar[int32]
+	Int64Add             = numericAdd[int64]
+	Int64AddScalar       = numericAddScalar[int64]
+	Uint8Add             = numericAdd[uint8]
+	Uint8AddScalar       = numericAddScalar[uint8]
+	Uint16Add            = numericAdd[uint16]
+	Uint16AddScalar      = numericAddScalar[uint16]
+	Uint32Add            = numericAdd[uint32]
+	Uint32AddScalar      = numericAddScalar[uint32]
+	Uint64Add            = numericAdd[uint64]
+	Uint64AddScalar      = numericAddScalar[uint64]
+	Float32Add           = numericAdd[float32]
+	Float32AddScalar     = numericAddScalar[float32]
+	Float64Add           = numericAdd[float64]
+	Float64AddScalar     = numericAddScalar[float64]
+	Int8AddSels          = numericAddSels[int8]
+	Int8AddScalarSels    = numericAddScalarSels[int8]
+	Int16AddSels         = numericAddSels[int16]
+	Int16AddScalarSels   = numericAddScalarSels[int16]
+	Int32AddSels         = numericAddSels[int32]
+	Int32AddScalarSels   = numericAddScalarSels[int32]
+	Int64AddSels         = numericAddSels[int64]
+	Int64AddScalarSels   = numericAddScalarSels[int64]
+	Uint8AddSels         = numericAddSels[uint8]
+	Uint8AddScalarSels   = numericAddScalarSels[uint8]
+	Uint16AddSels        = numericAddSels[uint16]
+	Uint16AddScalarSels  = numericAddScalarSels[uint16]
+	Uint32AddSels        = numericAddSels[uint32]
+	Uint32AddScalarSels  = numericAddScalarSels[uint32]
+	Uint64AddSels        = numericAddSels[uint64]
+	Uint64AddScalarSels  = numericAddScalarSels[uint64]
+	Float32AddSels       = numericAddSels[float32]
+	Float32AddScalarSels = numericAddScalarSels[float32]
+	Float64AddSels       = numericAddSels[float64]
+	Float64AddScalarSels = numericAddScalarSels[float64]
+
+	Int32Int64Add               = numericAdd2[int32, int64]
+	Int32Int64AddScalar         = numericAddScalar2[int32, int64]
+	Int32Int64AddSels           = numericAddSels2[int32, int64]
+	Int32Int64AddScalarSels     = numericAddScalarSels2[int32, int64]
+	Int16Int64Add               = numericAdd2[int16, int64]
+	Int16Int64AddScalar         = numericAddScalar2[int16, int64]
+	Int16Int64AddSels           = numericAddSels2[int16, int64]
+	Int16Int64AddScalarSels     = numericAddScalarSels2[int16, int64]
+	Int8Int64Add                = numericAdd2[int8, int64]
+	Int8Int64AddScalar          = numericAddScalar2[int8, int64]
+	Int8Int64AddSels            = numericAddSels2[int8, int64]
+	Int8Int64AddScalarSels      = numericAddScalarSels2[int8, int64]
+	Int16Int32Add               = numericAdd2[int16, int32]
+	Int16Int32AddScalar         = numericAddScalar2[int16, int32]
+	Int16Int32AddSels           = numericAddSels2[int16, int32]
+	Int16Int32AddScalarSels     = numericAddScalarSels2[int16, int32]
+	Int8Int32Add                = numericAdd2[int8, int32]
+	Int8Int32AddScalar          = numericAddScalar2[int8, int32]
+	Int8Int32AddSels            = numericAddSels2[int8, int32]
+	Int8Int32AddScalarSels      = numericAddScalarSels2[int8, int32]
+	Int8Int16Add                = numericAdd2[int8, int16]
+	Int8Int16AddScalar          = numericAddScalar2[int8, int16]
+	Int8Int16AddSels            = numericAddSels2[int8, int16]
+	Int8Int16AddScalarSels      = numericAddScalarSels2[int8, int16]
+	Float32Float64Add           = numericAdd2[float32, float64]
+	Float32Float64AddScalar     = numericAddScalar2[float32, float64]
+	Float32Float64AddSels       = numericAddSels2[float32, float64]
+	Float32Float64AddScalarSels = numericAddScalarSels2[float32, float64]
+	Uint32Uint64Add             = numericAdd2[uint32, uint64]
+	Uint32Uint64AddScalar       = numericAddScalar2[uint32, uint64]
+	Uint32Uint64AddSels         = numericAddSels2[uint32, uint64]
+	Uint32Uint64AddScalarSels   = numericAddScalarSels2[uint32, uint64]
+	Uint16Uint64Add             = numericAdd2[uint16, uint64]
+	Uint16Uint64AddScalar       = numericAddScalar2[uint16, uint64]
+	Uint16Uint64AddSels         = numericAddSels2[uint16, uint64]
+	Uint16Uint64AddScalarSels   = numericAddScalarSels2[uint16, uint64]
+	Uint8Uint64Add              = numericAdd2[uint8, uint64]
+	Uint8Uint64AddScalar        = numericAddScalar2[uint8, uint64]
+	Uint8Uint64AddSels          = numericAddSels2[uint8, uint64]
+	Uint8Uint64AddScalarSels    = numericAddScalarSels2[uint8, uint64]
+	Uint16Uint32Add             = numericAdd2[uint16, uint32]
+	Uint16Uint32AddScalar       = numericAddScalar2[uint16, uint32]
+	Uint16Uint32AddSels         = numericAddSels2[uint16, uint32]
+	Uint16Uint32AddScalarSels   = numericAddScalarSels2[uint16, uint32]
+	Uint8Uint32Add              = numericAdd2[uint8, uint32]
+	Uint8Uint32AddScalar        = numericAddScalar2[uint8, uint32]
+	Uint8Uint32AddSels          = numericAddSels2[uint8, uint32]
+	Uint8Uint32AddScalarSels    = numericAddScalarSels2[uint8, uint32]
+	Uint8Uint16Add              = numericAdd2[uint8, uint16]
+	Uint8Uint16AddScalar        = numericAddScalar2[uint8, uint16]
+	Uint8Uint16AddSels          = numericAddSels2[uint8, uint16]
+	Uint8Uint16AddScalarSels    = numericAddScalarSels2[uint8, uint16]
+
+	Decimal64Add            = decimal64Add
+	Decimal64AddSels        = decimal64AddSels
+	Decimal64AddScalar      = decimal64AddScalar
+	Decimal64AddScalarSels  = decimal64AddScalarSels
+	Decimal128Add           = decimal128Add
+	Decimal128AddSels       = decimal128AddSels
+	Decimal128AddScalar     = decimal128AddScalar
 	Decimal128AddScalarSels = decimal128AddScalarSels
-}
-
-func int8Add(xs, ys, rs []int8) []int8 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func int8AddSels(xs, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func int8AddScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func int8AddScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func int16Add(xs, ys, rs []int16) []int16 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func int16AddSels(xs, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func int16AddScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func int16AddScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func int32Add(xs, ys, rs []int32) []int32 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func int32AddSels(xs, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func int32AddScalar(x int32, ys, rs []int32) []int32 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func int32AddScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func int64Add(xs, ys, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func int64AddSels(xs, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func int64AddScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func int64AddScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func uint8Add(xs, ys, rs []uint8) []uint8 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func uint8AddSels(xs, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func uint8AddScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func uint8AddScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func uint16Add(xs, ys, rs []uint16) []uint16 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func uint16AddSels(xs, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func uint16AddScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func uint16AddScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func uint32Add(xs, ys, rs []uint32) []uint32 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func uint32AddSels(xs, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func uint32AddScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func uint32AddScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func uint64Add(xs, ys, rs []uint64) []uint64 {
-	for i, x := range xs {
-		rs[i] = x + ys[i]
-	}
-	return rs
-}
-
-func uint64AddSels(xs, ys, rs []uint64, sels []int64) []uint64 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
-	}
-	return rs
-}
-
-func uint64AddScalar(x uint64, ys, rs []uint64) []uint64 {
-	for i, y := range ys {
-		rs[i] = x + y
-	}
-	return rs
-}
-
-func uint64AddScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
-	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
+)
 
-func float32Add(xs, ys, rs []float32) []float32 {
+func numericAdd[T constraints.Integer | constraints.Float](xs, ys, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = x + ys[i]
 	}
 	return rs
 }
 
-func float32AddSels(xs, ys, rs []float32, sels []int64) []float32 {
+func numericAddSels[T constraints.Integer | constraints.Float](xs, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = xs[sel] + ys[sel]
 	}
 	return rs
 }
 
-func float32AddScalar(x float32, ys, rs []float32) []float32 {
+func numericAddScalar[T constraints.Integer | constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = x + y
 	}
 	return rs
 }
 
-func float32AddScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
+func numericAddScalarSels[T constraints.Integer | constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = x + ys[sel]
 	}
 	return rs
 }
 
-func float64Add(xs, ys, rs []float64) []float64 {
+func numericAdd2[TSmall, TBig constraints.Integer | constraints.Float](xs []TSmall, ys, rs []TBig) []TBig {
 	for i, x := range xs {
-		rs[i] = x + ys[i]
+		rs[i] = TBig(x) + ys[i]
 	}
 	return rs
 }
 
-func float64AddSels(xs, ys, rs []float64, sels []int64) []float64 {
+func numericAddSels2[TSmall, TBig constraints.Integer | constraints.Float](xs []TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = xs[sel] + ys[sel]
+		rs[i] = TBig(xs[sel]) + ys[sel]
 	}
 	return rs
 }
 
-func float64AddScalar(x float64, ys, rs []float64) []float64 {
+func numericAddScalar2[TSmall, TBig constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig) []TBig {
 	for i, y := range ys {
-		rs[i] = x + y
+		rs[i] = TBig(x) + y
 	}
 	return rs
 }
 
-func float64AddScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func numericAddScalarSels2[TSmall, TBig constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = x + ys[sel]
-	}
-	return rs
-}
-
-func int32Int64Add(xs []int32, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = int64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func int32Int64AddScalar(x int32, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = int64(x) + ys[i]
-	}
-	return rs
-}
-
-func int32Int64AddSels(xs []int32, ys, rs, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func int32Int64AddScalarSels(x int32, ys, rs, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(x) + ys[sel]
-	}
-	return rs
-}
-
-func int16Int64Add(xs []int16, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = int64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func int16Int64AddScalar(x int16, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = int64(x) + ys[i]
-	}
-	return rs
-}
-
-func int16Int64AddSels(xs []int16, ys, rs, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func int16Int64AddScalarSels(x int16, ys, rs, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(x) + ys[sel]
-	}
-	return rs
-}
-
-func int8Int64Add(xs []int8, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = int64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func int8Int64AddScalar(x int8, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = int64(x) + ys[i]
-	}
-	return rs
-}
-
-func int8Int64AddSels(xs []int8, ys, rs, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func int8Int64AddScalarSels(x int8, ys, rs, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(x) + ys[sel]
-	}
-	return rs
-}
-
-func int16Int32Add(xs []int16, ys, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = int32(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func int16Int32AddScalar(x int16, ys, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = int32(x) + ys[i]
-	}
-	return rs
-}
-
-func int16Int32AddSels(xs []int16, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = int32(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func int16Int32AddScalarSels(x int16, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = int32(x) + ys[sel]
-	}
-	return rs
-}
-
-func int8Int32Add(xs []int8, ys, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = int32(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func int8Int32AddScalar(x int8, ys, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = int32(x) + ys[i]
-	}
-	return rs
-}
-
-func int8Int32AddSels(xs []int8, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = int32(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func int8Int32AddScalarSels(x int8, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = int32(x) + ys[sel]
-	}
-	return rs
-}
-
-func int8Int16Add(xs []int8, ys, rs []int16) []int16 {
-	for i := range rs {
-		rs[i] = int16(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func int8Int16AddScalar(x int8, ys, rs []int16) []int16 {
-	for i := range rs {
-		rs[i] = int16(x) + ys[i]
-	}
-	return rs
-}
-
-func int8Int16AddSels(xs []int8, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = int16(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func int8Int16AddScalarSels(x int8, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = int16(x) + ys[sel]
-	}
-	return rs
-}
-
-func float32Float64Add(xs []float32, ys, rs []float64) []float64 {
-	for i := range rs {
-		rs[i] = float64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func float32Float64AddScalar(x float32, ys, rs []float64) []float64 {
-	for i := range rs {
-		rs[i] = float64(x) + ys[i]
-	}
-	return rs
-}
-
-func float32Float64AddSels(xs []float32, ys, rs []float64, sels []int64) []float64 {
-	for _, sel := range sels {
-		rs[sel] = float64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func float32Float64AddScalarSels(x float32, ys, rs []float64, sels []int64) []float64 {
-	for _, sel := range sels {
-		rs[sel] = float64(x) + ys[sel]
-	}
-	return rs
-}
-
-func uint32Uint64Add(xs []uint32, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = uint64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func uint32Uint64AddScalar(x uint32, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = uint64(x) + ys[i]
-	}
-	return rs
-}
-
-func uint32Uint64AddSels(xs []uint32, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = uint64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func uint32Uint64AddScalarSels(x uint32, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = uint64(x) + ys[sel]
-	}
-	return rs
-}
-
-func uint16Uint64Add(xs []uint16, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = uint64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func uint16Uint64AddScalar(x uint16, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = uint64(x) + ys[i]
-	}
-	return rs
-}
-
-func uint16Uint64AddSels(xs []uint16, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = uint64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func uint16Uint64AddScalarSels(x uint16, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = uint64(x) + ys[sel]
-	}
-	return rs
-}
-
-func uint8Uint64Add(xs []uint8, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = uint64(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func uint8Uint64AddScalar(x uint8, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = uint64(x) + ys[i]
-	}
-	return rs
-}
-
-func uint8Uint64AddSels(xs []uint8, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = uint64(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func uint8Uint64AddScalarSels(x uint8, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = uint64(x) + ys[sel]
-	}
-	return rs
-}
-
-func uint16Uint32Add(xs []uint16, ys, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = uint32(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func uint16Uint32AddScalar(x uint16, ys, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = uint32(x) + ys[i]
-	}
-	return rs
-}
-
-func uint16Uint32AddSels(xs []uint16, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = uint32(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func uint16Uint32AddScalarSels(x uint16, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = uint32(x) + ys[sel]
-	}
-	return rs
-}
-
-func uint8Uint32Add(xs []uint8, ys, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = uint32(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func uint8Uint32AddScalar(x uint8, ys, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = uint32(x) + ys[i]
-	}
-	return rs
-}
-
-func uint8Uint32AddSels(xs []uint8, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = uint32(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func uint8Uint32AddScalarSels(x uint8, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = uint32(x) + ys[sel]
-	}
-	return rs
-}
-
-func uint8Uint16Add(xs []uint8, ys, rs []uint16) []uint16 {
-	for i := range rs {
-		rs[i] = uint16(xs[i]) + ys[i]
-	}
-	return rs
-}
-
-func uint8Uint16AddScalar(x uint8, ys, rs []uint16) []uint16 {
-	for i := range rs {
-		rs[i] = uint16(x) + ys[i]
-	}
-	return rs
-}
-
-func uint8Uint16AddSels(xs []uint8, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = uint16(xs[sel]) + ys[sel]
-	}
-	return rs
-}
-
-func uint8Uint16AddScalarSels(x uint8, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = uint16(x) + ys[sel]
+		rs[i] = TBig(x) + ys[sel]
 	}
 	return rs
 }
@@ -837,10 +240,9 @@ func decimal64AddScalar(x types.Decimal64, ys []types.Decimal64, xScale, ysScale
 		}
 		return rs
 	} else if xScale < ysScale {
-		xScaled := x
 		scaleDiff := ysScale - xScale
 		scale := int64(math.Pow10(int(scaleDiff)))
-		xScaled = types.ScaleDecimal64(x, scale)
+		xScaled := types.ScaleDecimal64(x, scale)
 		for i, y := range ys {
 			rs[i] = types.Decimal64AddAligned(xScaled, y)
 		}
diff --git a/pkg/vectorize/add/add_amd64.go b/pkg/vectorize/add/add_amd64.go
index 6a08529a8842397763dc6f1d0c353963a60a51b5..2ef3b0e1af95a8c92c1201e53c6a088b0447d5aa 100644
--- a/pkg/vectorize/add/add_amd64.go
+++ b/pkg/vectorize/add/add_amd64.go
@@ -101,102 +101,7 @@ func init() {
 		Float32AddScalar = float32AddScalarAvx2
 		Float64Add = float64AddAvx2
 		Float64AddScalar = float64AddScalarAvx2
-	} else {
-		Int8Add = int8Add
-		Int8AddScalar = int8AddScalar
-		Int16Add = int16Add
-		Int16AddScalar = int16AddScalar
-		Int32Add = int32Add
-		Int32AddScalar = int32AddScalar
-		Int64Add = int64Add
-		Int64AddScalar = int64AddScalar
-		Uint8Add = uint8Add
-		Uint8AddScalar = uint8AddScalar
-		Uint16Add = uint16Add
-		Uint16AddScalar = uint16AddScalar
-		Uint32Add = uint32Add
-		Uint32AddScalar = uint32AddScalar
-		Uint64Add = uint64Add
-		Uint64AddScalar = uint64AddScalar
-		Float32Add = float32Add
-		Float32AddScalar = float32AddScalar
-		Float64Add = float64Add
-		Float64AddScalar = float64AddScalar
-	}
-
-	Int8AddSels = int8AddSels
-	Int8AddScalarSels = int8AddScalarSels
-	Int16AddSels = int16AddSels
-	Int16AddScalarSels = int16AddScalarSels
-	Int32AddSels = int32AddSels
-	Int32AddScalarSels = int32AddScalarSels
-	Int64AddSels = int64AddSels
-	Int64AddScalarSels = int64AddScalarSels
-	Uint8AddSels = uint8AddSels
-	Uint8AddScalarSels = uint8AddScalarSels
-	Uint16AddSels = uint16AddSels
-	Uint16AddScalarSels = uint16AddScalarSels
-	Uint32AddSels = uint32AddSels
-	Uint32AddScalarSels = uint32AddScalarSels
-	Uint64AddSels = uint64AddSels
-	Uint64AddScalarSels = uint64AddScalarSels
-	Float32AddSels = float32AddSels
-	Float32AddScalarSels = float32AddScalarSels
-	Float64AddSels = float64AddSels
-	Float64AddScalarSels = float64AddScalarSels
-
-	Int32Int64Add = int32Int64Add
-	Int32Int64AddScalar = int32Int64AddScalar
-	Int32Int64AddSels = int32Int64AddSels
-	Int32Int64AddScalarSels = int32Int64AddScalarSels
-	Int16Int64Add = int16Int64Add
-	Int16Int64AddScalar = int16Int64AddScalar
-	Int16Int64AddSels = int16Int64AddSels
-	Int16Int64AddScalarSels = int16Int64AddScalarSels
-	Int8Int64Add = int8Int64Add
-	Int8Int64AddScalar = int8Int64AddScalar
-	Int8Int64AddSels = int8Int64AddSels
-	Int8Int64AddScalarSels = int8Int64AddScalarSels
-	Int16Int32Add = int16Int32Add
-	Int16Int32AddScalar = int16Int32AddScalar
-	Int16Int32AddSels = int16Int32AddSels
-	Int16Int32AddScalarSels = int16Int32AddScalarSels
-	Int8Int32Add = int8Int32Add
-	Int8Int32AddScalar = int8Int32AddScalar
-	Int8Int32AddSels = int8Int32AddSels
-	Int8Int32AddScalarSels = int8Int32AddScalarSels
-	Int8Int16Add = int8Int16Add
-	Int8Int16AddScalar = int8Int16AddScalar
-	Int8Int16AddSels = int8Int16AddSels
-	Int8Int16AddScalarSels = int8Int16AddScalarSels
-	Float32Float64Add = float32Float64Add
-	Float32Float64AddScalar = float32Float64AddScalar
-	Float32Float64AddSels = float32Float64AddSels
-	Float32Float64AddScalarSels = float32Float64AddScalarSels
-	Uint32Uint64Add = uint32Uint64Add
-	Uint32Uint64AddScalar = uint32Uint64AddScalar
-	Uint32Uint64AddSels = uint32Uint64AddSels
-	Uint32Uint64AddScalarSels = uint32Uint64AddScalarSels
-	Uint16Uint64Add = uint16Uint64Add
-	Uint16Uint64AddScalar = uint16Uint64AddScalar
-	Uint16Uint64AddSels = uint16Uint64AddSels
-	Uint16Uint64AddScalarSels = uint16Uint64AddScalarSels
-	Uint8Uint64Add = uint8Uint64Add
-	Uint8Uint64AddScalar = uint8Uint64AddScalar
-	Uint8Uint64AddSels = uint8Uint64AddSels
-	Uint8Uint64AddScalarSels = uint8Uint64AddScalarSels
-	Uint16Uint32Add = uint16Uint32Add
-	Uint16Uint32AddScalar = uint16Uint32AddScalar
-	Uint16Uint32AddSels = uint16Uint32AddSels
-	Uint16Uint32AddScalarSels = uint16Uint32AddScalarSels
-	Uint8Uint32Add = uint8Uint32Add
-	Uint8Uint32AddScalar = uint8Uint32AddScalar
-	Uint8Uint32AddSels = uint8Uint32AddSels
-	Uint8Uint32AddScalarSels = uint8Uint32AddScalarSels
-	Uint8Uint16Add = uint8Uint16Add
-	Uint8Uint16AddScalar = uint8Uint16AddScalar
-	Uint8Uint16AddSels = uint8Uint16AddSels
-	Uint8Uint16AddScalarSels = uint8Uint16AddScalarSels
+	}
 }
 
 func int8AddAvx2(xs, ys, rs []int8) []int8 {
diff --git a/pkg/vectorize/add/add_arm64.go b/pkg/vectorize/add/add_arm64.go
deleted file mode 100644
index 8e67ca1e2ebba8cbe43fac1c8faf6b5fe97d27be..0000000000000000000000000000000000000000
--- a/pkg/vectorize/add/add_arm64.go
+++ /dev/null
@@ -1,111 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package add
-
-func init() {
-	Int8Add = int8Add
-	Int8AddScalar = int8AddScalar
-	Int16Add = int16Add
-	Int16AddScalar = int16AddScalar
-	Int32Add = int32Add
-	Int32AddScalar = int32AddScalar
-	Int64Add = int64Add
-	Int64AddScalar = int64AddScalar
-	Uint8Add = uint8Add
-	Uint8AddScalar = uint8AddScalar
-	Uint16Add = uint16Add
-	Uint16AddScalar = uint16AddScalar
-	Uint32Add = uint32Add
-	Uint32AddScalar = uint32AddScalar
-	Uint64Add = uint64Add
-	Uint64AddScalar = uint64AddScalar
-	Float32Add = float32Add
-	Float32AddScalar = float32AddScalar
-	Float64Add = float64Add
-	Float64AddScalar = float64AddScalar
-	Int8AddSels = int8AddSels
-	Int8AddScalarSels = int8AddScalarSels
-	Int16AddSels = int16AddSels
-	Int16AddScalarSels = int16AddScalarSels
-	Int32AddSels = int32AddSels
-	Int32AddScalarSels = int32AddScalarSels
-	Int64AddSels = int64AddSels
-	Int64AddScalarSels = int64AddScalarSels
-	Uint8AddSels = uint8AddSels
-	Uint8AddScalarSels = uint8AddScalarSels
-	Uint16AddSels = uint16AddSels
-	Uint16AddScalarSels = uint16AddScalarSels
-	Uint32AddSels = uint32AddSels
-	Uint32AddScalarSels = uint32AddScalarSels
-	Uint64AddSels = uint64AddSels
-	Uint64AddScalarSels = uint64AddScalarSels
-	Float32AddSels = float32AddSels
-	Float32AddScalarSels = float32AddScalarSels
-	Float64AddSels = float64AddSels
-	Float64AddScalarSels = float64AddScalarSels
-
-	Int32Int64Add = int32Int64Add
-	Int32Int64AddScalar = int32Int64AddScalar
-	Int32Int64AddSels = int32Int64AddSels
-	Int32Int64AddScalarSels = int32Int64AddScalarSels
-	Int16Int64Add = int16Int64Add
-	Int16Int64AddScalar = int16Int64AddScalar
-	Int16Int64AddSels = int16Int64AddSels
-	Int16Int64AddScalarSels = int16Int64AddScalarSels
-	Int8Int64Add = int8Int64Add
-	Int8Int64AddScalar = int8Int64AddScalar
-	Int8Int64AddSels = int8Int64AddSels
-	Int8Int64AddScalarSels = int8Int64AddScalarSels
-	Int16Int32Add = int16Int32Add
-	Int16Int32AddScalar = int16Int32AddScalar
-	Int16Int32AddSels = int16Int32AddSels
-	Int16Int32AddScalarSels = int16Int32AddScalarSels
-	Int8Int32Add = int8Int32Add
-	Int8Int32AddScalar = int8Int32AddScalar
-	Int8Int32AddSels = int8Int32AddSels
-	Int8Int32AddScalarSels = int8Int32AddScalarSels
-	Int8Int16Add = int8Int16Add
-	Int8Int16AddScalar = int8Int16AddScalar
-	Int8Int16AddSels = int8Int16AddSels
-	Int8Int16AddScalarSels = int8Int16AddScalarSels
-	Float32Float64Add = float32Float64Add
-	Float32Float64AddScalar = float32Float64AddScalar
-	Float32Float64AddSels = float32Float64AddSels
-	Float32Float64AddScalarSels = float32Float64AddScalarSels
-	Uint32Uint64Add = uint32Uint64Add
-	Uint32Uint64AddScalar = uint32Uint64AddScalar
-	Uint32Uint64AddSels = uint32Uint64AddSels
-	Uint32Uint64AddScalarSels = uint32Uint64AddScalarSels
-	Uint16Uint64Add = uint16Uint64Add
-	Uint16Uint64AddScalar = uint16Uint64AddScalar
-	Uint16Uint64AddSels = uint16Uint64AddSels
-	Uint16Uint64AddScalarSels = uint16Uint64AddScalarSels
-	Uint8Uint64Add = uint8Uint64Add
-	Uint8Uint64AddScalar = uint8Uint64AddScalar
-	Uint8Uint64AddSels = uint8Uint64AddSels
-	Uint8Uint64AddScalarSels = uint8Uint64AddScalarSels
-	Uint16Uint32Add = uint16Uint32Add
-	Uint16Uint32AddScalar = uint16Uint32AddScalar
-	Uint16Uint32AddSels = uint16Uint32AddSels
-	Uint16Uint32AddScalarSels = uint16Uint32AddScalarSels
-	Uint8Uint32Add = uint8Uint32Add
-	Uint8Uint32AddScalar = uint8Uint32AddScalar
-	Uint8Uint32AddSels = uint8Uint32AddSels
-	Uint8Uint32AddScalarSels = uint8Uint32AddScalarSels
-	Uint8Uint16Add = uint8Uint16Add
-	Uint8Uint16AddScalar = uint8Uint16AddScalar
-	Uint8Uint16AddSels = uint8Uint16AddSels
-	Uint8Uint16AddScalarSels = uint8Uint16AddScalarSels
-}
diff --git a/pkg/vectorize/add/avx2.s b/pkg/vectorize/add/add_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/add/avx2.s
rename to pkg/vectorize/add/add_avx2_amd64.s
index 91edc4f08d41db577faf93a24edaee8cea9be4f5..1be5a559c99de3ed967b6b1c536f8e5f8176bca1 100644
--- a/pkg/vectorize/add/avx2.s
+++ b/pkg/vectorize/add/add_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/add/avx512.s b/pkg/vectorize/add/add_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/add/avx512.s
rename to pkg/vectorize/add/add_avx512_amd64.s
index 7b281d0e793fb9cac800dfe3f4a9e804295ea4f9..b3a50045b59ff38feec421759fac75e6e17e43d7 100644
--- a/pkg/vectorize/add/avx512.s
+++ b/pkg/vectorize/add/add_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/add/add_test.go b/pkg/vectorize/add/add_test.go
index cd888ecb3654bf1e8533549fa3ef6213daab79c0..434da5b4cd47a61b7ff3872a499d1aab5615d936 100644
--- a/pkg/vectorize/add/add_test.go
+++ b/pkg/vectorize/add/add_test.go
@@ -39,12 +39,12 @@ func TestF64Add(t *testing.T) {
 	xs := makeFbuffer(13)
 	res := make([]float64, 13)
 	fmt.Printf("sum:\n\t%v\n", Float64Add(xs, xs, res))
-	fmt.Printf("pure sum:\n\t%v\n", float64Add(xs, xs, res))
+	fmt.Printf("pure sum:\n\t%v\n", numericAdd(xs, xs, res))
 }
 
 func TestI64Add(t *testing.T) {
 	xs := makeIbuffer(100)
 	res := make([]int64, 50)
 	fmt.Printf("sum: %v\n", Int64Add(xs[:50], xs[50:], res))
-	fmt.Printf("pure sum: %v\n", int64Add(xs[:50], xs[50:], res))
+	fmt.Printf("pure sum: %v\n", numericAdd(xs[:50], xs[50:], res))
 }
diff --git a/pkg/vectorize/and/x86.s b/pkg/vectorize/and/and_amd64.s
similarity index 97%
rename from pkg/vectorize/and/x86.s
rename to pkg/vectorize/and/and_amd64.s
index 00b8af9762f719abee9253f4dfc76d91c7decd60..b933411a8e74a7a0045493855dbcae3ab549fa98 100644
--- a/pkg/vectorize/and/x86.s
+++ b/pkg/vectorize/and/and_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run x86.go -out x86.s -stubs x86_stubs.go. DO NOT EDIT.
-// +build 386 amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/div/div.go b/pkg/vectorize/div/div.go
index d88b69456a29ce52acb46874be8568618cf57a65..e5a3e8f70b12572ffd55bb46033e6958135d88e3 100644
--- a/pkg/vectorize/div/div.go
+++ b/pkg/vectorize/div/div.go
@@ -14,685 +14,179 @@
 
 package div
 
-import "github.com/matrixorigin/matrixone/pkg/container/types"
-
-var (
-	Int8Div                func([]int8, []int8, []int8) []int8
-	Int8DivSels            func([]int8, []int8, []int8, []int64) []int8
-	Int8DivScalar          func(int8, []int8, []int8) []int8
-	Int8DivScalarSels      func(int8, []int8, []int8, []int64) []int8
-	Int8DivByScalar        func(int8, []int8, []int8) []int8
-	Int8DivByScalarSels    func(int8, []int8, []int8, []int64) []int8
-	Int16Div               func([]int16, []int16, []int16) []int16
-	Int16DivSels           func([]int16, []int16, []int16, []int64) []int16
-	Int16DivScalar         func(int16, []int16, []int16) []int16
-	Int16DivScalarSels     func(int16, []int16, []int16, []int64) []int16
-	Int16DivByScalar       func(int16, []int16, []int16) []int16
-	Int16DivByScalarSels   func(int16, []int16, []int16, []int64) []int16
-	Int32Div               func([]int32, []int32, []int32) []int32
-	Int32DivSels           func([]int32, []int32, []int32, []int64) []int32
-	Int32DivScalar         func(int32, []int32, []int32) []int32
-	Int32DivScalarSels     func(int32, []int32, []int32, []int64) []int32
-	Int32DivByScalar       func(int32, []int32, []int32) []int32
-	Int32DivByScalarSels   func(int32, []int32, []int32, []int64) []int32
-	Int64Div               func([]int64, []int64, []int64) []int64
-	Int64DivSels           func([]int64, []int64, []int64, []int64) []int64
-	Int64DivScalar         func(int64, []int64, []int64) []int64
-	Int64DivScalarSels     func(int64, []int64, []int64, []int64) []int64
-	Int64DivByScalar       func(int64, []int64, []int64) []int64
-	Int64DivByScalarSels   func(int64, []int64, []int64, []int64) []int64
-	Uint8Div               func([]uint8, []uint8, []uint8) []uint8
-	Uint8DivSels           func([]uint8, []uint8, []uint8, []int64) []uint8
-	Uint8DivScalar         func(uint8, []uint8, []uint8) []uint8
-	Uint8DivScalarSels     func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint8DivByScalar       func(uint8, []uint8, []uint8) []uint8
-	Uint8DivByScalarSels   func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint16Div              func([]uint16, []uint16, []uint16) []uint16
-	Uint16DivSels          func([]uint16, []uint16, []uint16, []int64) []uint16
-	Uint16DivScalar        func(uint16, []uint16, []uint16) []uint16
-	Uint16DivScalarSels    func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint16DivByScalar      func(uint16, []uint16, []uint16) []uint16
-	Uint16DivByScalarSels  func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint32Div              func([]uint32, []uint32, []uint32) []uint32
-	Uint32DivSels          func([]uint32, []uint32, []uint32, []int64) []uint32
-	Uint32DivScalar        func(uint32, []uint32, []uint32) []uint32
-	Uint32DivScalarSels    func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint32DivByScalar      func(uint32, []uint32, []uint32) []uint32
-	Uint32DivByScalarSels  func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint64Div              func([]uint64, []uint64, []uint64) []uint64
-	Uint64DivSels          func([]uint64, []uint64, []uint64, []int64) []uint64
-	Uint64DivScalar        func(uint64, []uint64, []uint64) []uint64
-	Uint64DivScalarSels    func(uint64, []uint64, []uint64, []int64) []uint64
-	Uint64DivByScalar      func(uint64, []uint64, []uint64) []uint64
-	Uint64DivByScalarSels  func(uint64, []uint64, []uint64, []int64) []uint64
-	Float32Div             func([]float32, []float32, []float32) []float32
-	Float32DivSels         func([]float32, []float32, []float32, []int64) []float32
-	Float32DivScalar       func(float32, []float32, []float32) []float32
-	Float32DivScalarSels   func(float32, []float32, []float32, []int64) []float32
-	Float32DivByScalar     func(float32, []float32, []float32) []float32
-	Float32DivByScalarSels func(float32, []float32, []float32, []int64) []float32
-	Float64Div             func([]float64, []float64, []float64) []float64
-	Float64DivSels         func([]float64, []float64, []float64, []int64) []float64
-	Float64DivScalar       func(float64, []float64, []float64) []float64
-	Float64DivScalarSels   func(float64, []float64, []float64, []int64) []float64
-	Float64DivByScalar     func(float64, []float64, []float64) []float64
-	Float64DivByScalarSels func(float64, []float64, []float64, []int64) []float64
-
-	Float32IntegerDiv             func([]float32, []float32, []int64) []int64
-	Float32IntegerDivSels         func([]float32, []float32, []int64, []int64) []int64
-	Float32IntegerDivScalar       func(float32, []float32, []int64) []int64
-	Float32IntegerDivScalarSels   func(float32, []float32, []int64, []int64) []int64
-	Float32IntegerDivByScalar     func(float32, []float32, []int64) []int64
-	Float32IntegerDivByScalarSels func(float32, []float32, []int64, []int64) []int64
-
-	Float64IntegerDiv             func([]float64, []float64, []int64) []int64
-	Float64IntegerDivSels         func([]float64, []float64, []int64, []int64) []int64
-	Float64IntegerDivScalar       func(float64, []float64, []int64) []int64
-	Float64IntegerDivScalarSels   func(float64, []float64, []int64, []int64) []int64
-	Float64IntegerDivByScalar     func(float64, []float64, []int64) []int64
-	Float64IntegerDivByScalarSels func(float64, []float64, []int64, []int64) []int64
-
-	Decimal64Div              func([]types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal64DivSels          func([]types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal64DivScalar        func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal64DivScalarSels    func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal64DivByScalar      func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal64DivByScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128Div             func([]types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128DivSels         func([]types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128DivScalar       func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128DivScalarSels   func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128DivByScalar     func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128DivByScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
+import (
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
-func init() {
-	Int8Div = int8Div
-	Int8DivSels = int8DivSels
-	Int8DivScalar = int8DivScalar
-	Int8DivScalarSels = int8DivScalarSels
-	Int8DivByScalar = int8DivByScalar
-	Int8DivByScalarSels = int8DivByScalarSels
-	Int16Div = int16Div
-	Int16DivSels = int16DivSels
-	Int16DivScalar = int16DivScalar
-	Int16DivScalarSels = int16DivScalarSels
-	Int16DivByScalar = int16DivByScalar
-	Int16DivByScalarSels = int16DivByScalarSels
-	Int32Div = int32Div
-	Int32DivSels = int32DivSels
-	Int32DivScalar = int32DivScalar
-	Int32DivScalarSels = int32DivScalarSels
-	Int32DivByScalar = int32DivByScalar
-	Int32DivByScalarSels = int32DivByScalarSels
-	Int64Div = int64Div
-	Int64DivSels = int64DivSels
-	Int64DivScalar = int64DivScalar
-	Int64DivScalarSels = int64DivScalarSels
-	Int64DivByScalar = int64DivByScalar
-	Int64DivByScalarSels = int64DivByScalarSels
-	Uint8Div = uint8Div
-	Uint8DivSels = uint8DivSels
-	Uint8DivScalar = uint8DivScalar
-	Uint8DivScalarSels = uint8DivScalarSels
-	Uint8DivByScalar = uint8DivByScalar
-	Uint8DivByScalarSels = uint8DivByScalarSels
-	Uint16Div = uint16Div
-	Uint16DivSels = uint16DivSels
-	Uint16DivScalar = uint16DivScalar
-	Uint16DivScalarSels = uint16DivScalarSels
-	Uint16DivByScalar = uint16DivByScalar
-	Uint16DivByScalarSels = uint16DivByScalarSels
-	Uint32Div = uint32Div
-	Uint32DivSels = uint32DivSels
-	Uint32DivScalar = uint32DivScalar
-	Uint32DivScalarSels = uint32DivScalarSels
-	Uint32DivByScalar = uint32DivByScalar
-	Uint32DivByScalarSels = uint32DivByScalarSels
-	Uint64Div = uint64Div
-	Uint64DivSels = uint64DivSels
-	Uint64DivScalar = uint64DivScalar
-	Uint64DivScalarSels = uint64DivScalarSels
-	Uint64DivByScalar = uint64DivByScalar
-	Uint64DivByScalarSels = uint64DivByScalarSels
-	Float32Div = float32Div
-	Float32DivSels = float32DivSels
-	Float32DivScalar = float32DivScalar
-	Float32DivScalarSels = float32DivScalarSels
-	Float32DivByScalar = float32DivByScalar
-	Float32DivByScalarSels = float32DivByScalarSels
-	Float64Div = float64Div
-	Float64DivSels = float64DivSels
-	Float64DivScalar = float64DivScalar
-	Float64DivScalarSels = float64DivScalarSels
-	Float64DivByScalar = float64DivByScalar
-	Float64DivByScalarSels = float64DivByScalarSels
-	Decimal64Div = decimal64Div
-	Decimal64DivSels = decimal64DivSels
-	Decimal64DivScalar = decimal64DivScalar
-	Decimal64DivScalarSels = decimal64DivScalarSels
-	Decimal64DivByScalar = decimal64DivByScalar
-	Decimal64DivByScalarSels = decimal64DivByScalarSels
-	Decimal128Div = decimal128Div
-	Decimal128DivSels = decimal128DivSels
-	Decimal128DivScalar = decimal128DivScalar
-	Decimal128DivScalarSels = decimal128DivScalarSels
-	Decimal128DivByScalar = decimal128DivByScalar
+var (
+	Int8Div                = numericDiv[int8]
+	Int8DivSels            = numericDivSels[int8]
+	Int8DivScalar          = numericDivScalar[int8]
+	Int8DivScalarSels      = numericDivScalarSels[int8]
+	Int8DivByScalar        = numericDivByScalar[int8]
+	Int8DivByScalarSels    = numericDivByScalarSels[int8]
+	Int16Div               = numericDiv[int16]
+	Int16DivSels           = numericDivSels[int16]
+	Int16DivScalar         = numericDivScalar[int16]
+	Int16DivScalarSels     = numericDivScalarSels[int16]
+	Int16DivByScalar       = numericDivByScalar[int16]
+	Int16DivByScalarSels   = numericDivByScalarSels[int16]
+	Int32Div               = numericDiv[int32]
+	Int32DivSels           = numericDivSels[int32]
+	Int32DivScalar         = numericDivScalar[int32]
+	Int32DivScalarSels     = numericDivScalarSels[int32]
+	Int32DivByScalar       = numericDivByScalar[int32]
+	Int32DivByScalarSels   = numericDivByScalarSels[int32]
+	Int64Div               = numericDiv[int64]
+	Int64DivSels           = numericDivSels[int64]
+	Int64DivScalar         = numericDivScalar[int64]
+	Int64DivScalarSels     = numericDivScalarSels[int64]
+	Int64DivByScalar       = numericDivByScalar[int64]
+	Int64DivByScalarSels   = numericDivByScalarSels[int64]
+	Uint8Div               = numericDiv[uint8]
+	Uint8DivSels           = numericDivSels[uint8]
+	Uint8DivScalar         = numericDivScalar[uint8]
+	Uint8DivScalarSels     = numericDivScalarSels[uint8]
+	Uint8DivByScalar       = numericDivByScalar[uint8]
+	Uint8DivByScalarSels   = numericDivByScalarSels[uint8]
+	Uint16Div              = numericDiv[uint16]
+	Uint16DivSels          = numericDivSels[uint16]
+	Uint16DivScalar        = numericDivScalar[uint16]
+	Uint16DivScalarSels    = numericDivScalarSels[uint16]
+	Uint16DivByScalar      = numericDivByScalar[uint16]
+	Uint16DivByScalarSels  = numericDivByScalarSels[uint16]
+	Uint32Div              = numericDiv[uint32]
+	Uint32DivSels          = numericDivSels[uint32]
+	Uint32DivScalar        = numericDivScalar[uint32]
+	Uint32DivScalarSels    = numericDivScalarSels[uint32]
+	Uint32DivByScalar      = numericDivByScalar[uint32]
+	Uint32DivByScalarSels  = numericDivByScalarSels[uint32]
+	Uint64Div              = numericDiv[uint64]
+	Uint64DivSels          = numericDivSels[uint64]
+	Uint64DivScalar        = numericDivScalar[uint64]
+	Uint64DivScalarSels    = numericDivScalarSels[uint64]
+	Uint64DivByScalar      = numericDivByScalar[uint64]
+	Uint64DivByScalarSels  = numericDivByScalarSels[uint64]
+	Float32Div             = numericDiv[float32]
+	Float32DivSels         = numericDivSels[float32]
+	Float32DivScalar       = numericDivScalar[float32]
+	Float32DivScalarSels   = numericDivScalarSels[float32]
+	Float32DivByScalar     = numericDivByScalar[float32]
+	Float32DivByScalarSels = numericDivByScalarSels[float32]
+	Float64Div             = numericDiv[float64]
+	Float64DivSels         = numericDivSels[float64]
+	Float64DivScalar       = numericDivScalar[float64]
+	Float64DivScalarSels   = numericDivScalarSels[float64]
+	Float64DivByScalar     = numericDivByScalar[float64]
+	Float64DivByScalarSels = numericDivByScalarSels[float64]
+
+	Decimal64Div              = decimal64Div
+	Decimal64DivSels          = decimal64DivSels
+	Decimal64DivScalar        = decimal64DivScalar
+	Decimal64DivScalarSels    = decimal64DivScalarSels
+	Decimal64DivByScalar      = decimal64DivByScalar
+	Decimal64DivByScalarSels  = decimal64DivByScalarSels
+	Decimal128Div             = decimal128Div
+	Decimal128DivSels         = decimal128DivSels
+	Decimal128DivScalar       = decimal128DivScalar
+	Decimal128DivScalarSels   = decimal128DivScalarSels
+	Decimal128DivByScalar     = decimal128DivByScalar
 	Decimal128DivByScalarSels = decimal128DivByScalarSels
 
-	Float32IntegerDiv = float32IntegerDiv
-	Float32IntegerDivSels = float32IntegerDivSels
-	Float32IntegerDivScalar = float32IntegerDivScalar
-	Float32IntegerDivScalarSels = float32IntegerDivScalarSels
-	Float32IntegerDivByScalar = float32IntegerDivByScalar
-	Float32IntegerDivByScalarSels = float32IntegerDivByScalarSels
-
-	Float64IntegerDiv = float64IntegerDiv
-	Float64IntegerDivSels = float64IntegerDivSels
-	Float64IntegerDivScalar = float64IntegerDivScalar
-	Float64IntegerDivScalarSels = float64IntegerDivScalarSels
-	Float64IntegerDivByScalar = float64IntegerDivByScalar
-	Float64IntegerDivByScalarSels = float64IntegerDivByScalarSels
-}
-
-func int8Div(xs, ys, rs []int8) []int8 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func int8DivSels(xs, ys, rs []int8, sels []int64) []int8 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func int8DivScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func int8DivScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func int8DivByScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func int8DivByScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func int16Div(xs, ys, rs []int16) []int16 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func int16DivSels(xs, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func int16DivScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func int16DivScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func int16DivByScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func int16DivByScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func int32Div(xs, ys, rs []int32) []int32 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func int32DivSels(xs, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func int32DivScalar(x int32, ys, rs []int32) []int32 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func int32DivScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func int32DivByScalar(x int32, ys, rs []int32) []int32 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func int32DivByScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func int64Div(xs, ys, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func int64DivSels(xs, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func int64DivScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func int64DivScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func int64DivByScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func int64DivByScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func uint8Div(xs, ys, rs []uint8) []uint8 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func uint8DivSels(xs, ys, rs []uint8, sels []int64) []uint8 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func uint8DivScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func uint8DivScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func uint8DivByScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func uint8DivByScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func uint16Div(xs, ys, rs []uint16) []uint16 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func uint16DivSels(xs, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func uint16DivScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func uint16DivScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func uint16DivByScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func uint16DivByScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func uint32Div(xs, ys, rs []uint32) []uint32 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func uint32DivSels(xs, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func uint32DivScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func uint32DivScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func uint32DivByScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func uint32DivByScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func uint64Div(xs, ys, rs []uint64) []uint64 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func uint64DivSels(xs, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func uint64DivScalar(x uint64, ys, rs []uint64) []uint64 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func uint64DivScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func uint64DivByScalar(x uint64, ys, rs []uint64) []uint64 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func uint64DivByScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
-
-func float32Div(xs, ys, rs []float32) []float32 {
-	for i, x := range xs {
-		rs[i] = x / ys[i]
-	}
-	return rs
-}
-
-func float32DivSels(xs, ys, rs []float32, sels []int64) []float32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] / ys[sel]
-	}
-	return rs
-}
-
-func float32DivScalar(x float32, ys, rs []float32) []float32 {
-	for i, y := range ys {
-		rs[i] = x / y
-	}
-	return rs
-}
-
-func float32DivScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
-	for _, sel := range sels {
-		rs[sel] = x / ys[sel]
-	}
-	return rs
-}
-
-func float32DivByScalar(x float32, ys, rs []float32) []float32 {
-	for i, y := range ys {
-		rs[i] = y / x
-	}
-	return rs
-}
-
-func float32DivByScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] / x
-	}
-	return rs
-}
+	Float32IntegerDiv             = floatIntegerDiv[float32]
+	Float32IntegerDivSels         = floatIntegerDivSels[float32]
+	Float32IntegerDivScalar       = floatIntegerDivScalar[float32]
+	Float32IntegerDivScalarSels   = floatIntegerDivScalarSels[float32]
+	Float32IntegerDivByScalar     = floatIntegerDivByScalar[float32]
+	Float32IntegerDivByScalarSels = floatIntegerDivByScalarSels[float32]
+
+	Float64IntegerDiv             = floatIntegerDiv[float64]
+	Float64IntegerDivSels         = floatIntegerDivSels[float64]
+	Float64IntegerDivScalar       = floatIntegerDivScalar[float64]
+	Float64IntegerDivScalarSels   = floatIntegerDivScalarSels[float64]
+	Float64IntegerDivByScalar     = floatIntegerDivByScalar[float64]
+	Float64IntegerDivByScalarSels = floatIntegerDivByScalarSels[float64]
+)
 
-func float64Div(xs, ys, rs []float64) []float64 {
+func numericDiv[T constraints.Integer | constraints.Float](xs, ys, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = x / ys[i]
 	}
 	return rs
 }
 
-func float64DivSels(xs, ys, rs []float64, sels []int64) []float64 {
+func numericDivSels[T constraints.Integer | constraints.Float](xs, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = xs[sel] / ys[sel]
 	}
 	return rs
 }
 
-func float64DivScalar(x float64, ys, rs []float64) []float64 {
+func numericDivScalar[T constraints.Integer | constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = x / y
 	}
 	return rs
 }
 
-func float64DivScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func numericDivScalarSels[T constraints.Integer | constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = x / ys[sel]
 	}
 	return rs
 }
 
-func float64DivByScalar(x float64, ys, rs []float64) []float64 {
+func numericDivByScalar[T constraints.Integer | constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = y / x
 	}
 	return rs
 }
 
-func float64DivByScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func numericDivByScalarSels[T constraints.Integer | constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = ys[sel] / x
 	}
 	return rs
 }
 
-func float32IntegerDiv(xs, ys []float32, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = int64(x / ys[i])
-	}
-	return rs
-}
-
-func float32IntegerDivSels(xs, ys []float32, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(xs[sel] / ys[sel])
-	}
-	return rs
-}
-
-func float32IntegerDivScalar(x float32, ys []float32, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = int64(x / y)
-	}
-	return rs
-}
-
-func float32IntegerDivScalarSels(x float32, ys []float32, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(x / ys[sel])
-	}
-	return rs
-}
-
-func float32IntegerDivByScalar(x float32, ys []float32, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = int64(y / x)
-	}
-	return rs
-}
-
-func float32IntegerDivByScalarSels(x float32, ys []float32, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = int64(ys[sel] / x)
-	}
-	return rs
-}
-
-func float64IntegerDiv(xs, ys []float64, rs []int64) []int64 {
+func floatIntegerDiv[T constraints.Float](xs, ys []T, rs []int64) []int64 {
 	for i, x := range xs {
 		rs[i] = int64(x / ys[i])
 	}
 	return rs
 }
 
-func float64IntegerDivSels(xs, ys []float64, rs []int64, sels []int64) []int64 {
+func floatIntegerDivSels[T constraints.Float](xs, ys []T, rs []int64, sels []int64) []int64 {
 	for _, sel := range sels {
 		rs[sel] = int64(xs[sel] / ys[sel])
 	}
 	return rs
 }
 
-func float64IntegerDivScalar(x float64, ys []float64, rs []int64) []int64 {
+func floatIntegerDivScalar[T constraints.Float](x T, ys []T, rs []int64) []int64 {
 	for i, y := range ys {
 		rs[i] = int64(x / y)
 	}
 	return rs
 }
 
-func float64IntegerDivScalarSels(x float64, ys []float64, rs []int64, sels []int64) []int64 {
+func floatIntegerDivScalarSels[T constraints.Float](x T, ys []T, rs []int64, sels []int64) []int64 {
 	for _, sel := range sels {
 		rs[sel] = int64(x / ys[sel])
 	}
 	return rs
 }
 
-func float64IntegerDivByScalar(x float64, ys []float64, rs []int64) []int64 {
+func floatIntegerDivByScalar[T constraints.Float](x T, ys []T, rs []int64) []int64 {
 	for i, y := range ys {
 		rs[i] = int64(y / x)
 	}
 	return rs
 }
 
-func float64IntegerDivByScalarSels(x float64, ys []float64, rs []int64, sels []int64) []int64 {
+func floatIntegerDivByScalarSels[T constraints.Float](x T, ys []T, rs []int64, sels []int64) []int64 {
 	for _, sel := range sels {
 		rs[sel] = int64(ys[sel] / x)
 	}
diff --git a/pkg/vectorize/div/div_test.go b/pkg/vectorize/div/div_test.go
index 7b19266c440cab65243978ffd2ac417b428ab0f7..03d73da6f0b0a2d092743d4c008d409a30e82fb3 100644
--- a/pkg/vectorize/div/div_test.go
+++ b/pkg/vectorize/div/div_test.go
@@ -1,15 +1,16 @@
 package div
 
 import (
-	"github.com/stretchr/testify/require"
 	"testing"
+
+	"github.com/stretchr/testify/require"
 )
 
 func TestInt8Div(t *testing.T) {
 	xs := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	ys := []int8{4, 3, 2, 1, -1, -2, -3, -4, -5, -6}
 	rs := make([]int8, len(xs))
-	rs = int8Div(xs, ys, rs)
+	rs = Int8Div(xs, ys, rs)
 	rsCorrect := make([]int8, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -22,7 +23,7 @@ func TestInt8DivSels(t *testing.T) {
 	ys := []int8{4, 3, 2, 1, -1, -2, -3, -4, -5, -6}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]int8, len(xs))
-	rs = int8DivSels(xs, ys, rs, selects)
+	rs = Int8DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]int8, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -34,7 +35,7 @@ func TestInt8DivScalar(t *testing.T) {
 	x := int8(6)
 	ys := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]int8, len(ys))
-	rs = int8DivScalar(x, ys, rs)
+	rs = Int8DivScalar(x, ys, rs)
 	rsCorrect := make([]int8, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -47,7 +48,7 @@ func TestInt8DivScalarSels(t *testing.T) {
 	ys := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]int8, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = int8DivScalarSels(x, ys, rs, selects)
+	rs = Int8DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]int8, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -59,7 +60,7 @@ func TestInt8DivByScalar(t *testing.T) {
 	x := int8(6)
 	ys := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]int8, len(ys))
-	rs = int8DivByScalar(x, ys, rs)
+	rs = Int8DivByScalar(x, ys, rs)
 	rsCorrect := make([]int8, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -72,7 +73,7 @@ func TestInt8DivByScalarSels(t *testing.T) {
 	ys := []int8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]int8, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = int8DivByScalarSels(x, ys, rs, selects)
+	rs = Int8DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]int8, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -85,7 +86,7 @@ func TestInt16Div(t *testing.T) {
 	ys := []int16{10, 16, 29, 65, 5, 1, 4}
 	rs := make([]int16, len(ys))
 	rsCorrect := make([]int16, len(ys))
-	rs = int16Div(xs, ys, rs)
+	rs = Int16Div(xs, ys, rs)
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
 	}
@@ -99,7 +100,7 @@ func TestInt16DivSels(t *testing.T) {
 
 	rsCorrect := make([]int16, len(ys))
 	selects := []int64{1, 3, 5}
-	rs = int16DivSels(xs, ys, rs, selects)
+	rs = Int16DivSels(xs, ys, rs, selects)
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
 	}
@@ -111,7 +112,7 @@ func TestInt16DivScalar(t *testing.T) {
 	ys := []int16{-500, -345, 123, 345, 567, 7890, 15}
 	rs := make([]int16, len(ys))
 	rsCorrect := make([]int16, len(ys))
-	rs = int16DivScalar(x, ys, rs)
+	rs = Int16DivScalar(x, ys, rs)
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
 	}
@@ -124,7 +125,7 @@ func TestInt16DivScalarSels(t *testing.T) {
 	selects := []int64{1, 3, 5}
 	rs := make([]int16, len(ys))
 	rsCorrect := make([]int16, len(ys))
-	rs = int16DivScalarSels(x, ys, rs, selects)
+	rs = Int16DivScalarSels(x, ys, rs, selects)
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
 	}
@@ -136,7 +137,7 @@ func TestInt16DivByScalar(t *testing.T) {
 	ys := []int16{-500, -345, 123, 345, 567, 7890, 15}
 	rs := make([]int16, len(ys))
 	rsCorrect := make([]int16, len(ys))
-	rs = int16DivByScalar(x, ys, rs)
+	rs = Int16DivByScalar(x, ys, rs)
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
 	}
@@ -148,7 +149,7 @@ func TestInt32Div(t *testing.T) {
 	ys := []int16{10, 16, 29, 65, 5, 1, 4}
 	rs := make([]int16, len(ys))
 	rsCorrect := make([]int16, len(ys))
-	rs = int16Div(xs, ys, rs)
+	rs = Int16Div(xs, ys, rs)
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
 	}
@@ -162,7 +163,7 @@ func TestInt32DivSels(t *testing.T) {
 
 	rsCorrect := make([]int32, len(ys))
 	selects := []int64{1, 3, 5}
-	rs = int32DivSels(xs, ys, rs, selects)
+	rs = Int32DivSels(xs, ys, rs, selects)
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
 	}
@@ -174,7 +175,7 @@ func TestInt32DivScalar(t *testing.T) {
 	ys := []int32{-500, -345, 123, 345, 567, 7890, 15}
 	rs := make([]int32, len(ys))
 	rsCorrect := make([]int32, len(ys))
-	rs = int32DivScalar(x, ys, rs)
+	rs = Int32DivScalar(x, ys, rs)
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
 	}
@@ -187,7 +188,7 @@ func TestInt32DivScalarSels(t *testing.T) {
 	selects := []int64{1, 3, 5}
 	rs := make([]int32, len(ys))
 	rsCorrect := make([]int32, len(ys))
-	rs = int32DivScalarSels(x, ys, rs, selects)
+	rs = Int32DivScalarSels(x, ys, rs, selects)
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
 	}
@@ -199,7 +200,7 @@ func TestInt32DivByScalar(t *testing.T) {
 	ys := []int32{-500, -345, 123, 345, 567, 7890, 15}
 	rs := make([]int32, len(ys))
 	rsCorrect := make([]int32, len(ys))
-	rs = int32DivByScalar(x, ys, rs)
+	rs = Int32DivByScalar(x, ys, rs)
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
 	}
@@ -211,7 +212,7 @@ func TestInt64DivScalar(t *testing.T) {
 	ys := []int64{-500, -345, 123, 345, 567, 7890, 15}
 	rs := make([]int64, len(ys))
 	rsCorrect := make([]int64, len(ys))
-	rs = int64DivScalar(x, ys, rs)
+	rs = Int64DivScalar(x, ys, rs)
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
 	}
@@ -224,7 +225,7 @@ func TestInt64DivScalarSels(t *testing.T) {
 	selects := []int64{1, 3, 5}
 	rs := make([]int64, len(ys))
 	rsCorrect := make([]int64, len(ys))
-	rs = int64DivScalarSels(x, ys, rs, selects)
+	rs = Int64DivScalarSels(x, ys, rs, selects)
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
 	}
@@ -236,7 +237,7 @@ func TestInt64DivByScalar(t *testing.T) {
 	ys := []int64{-500, -345, 123, 345, 567, 7890, 15}
 	rs := make([]int64, len(ys))
 	rsCorrect := make([]int64, len(ys))
-	rs = int64DivByScalar(x, ys, rs)
+	rs = Int64DivByScalar(x, ys, rs)
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
 	}
@@ -247,7 +248,7 @@ func TestUint8Div(t *testing.T) {
 	xs := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	ys := []uint8{4, 3, 2, 1, 9, 12, 13, 24, 55, 96}
 	rs := make([]uint8, len(xs))
-	rs = uint8Div(xs, ys, rs)
+	rs = Uint8Div(xs, ys, rs)
 	rsCorrect := make([]uint8, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -260,7 +261,7 @@ func TestUint8DivSels(t *testing.T) {
 	ys := []uint8{4, 3, 2, 1, 9, 12, 13, 24, 55, 96}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]uint8, len(xs))
-	rs = uint8DivSels(xs, ys, rs, selects)
+	rs = Uint8DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]uint8, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -272,7 +273,7 @@ func TestUint8DivScalar(t *testing.T) {
 	x := uint8(6)
 	ys := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]uint8, len(ys))
-	rs = uint8DivScalar(x, ys, rs)
+	rs = Uint8DivScalar(x, ys, rs)
 	rsCorrect := make([]uint8, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -285,7 +286,7 @@ func TestUint8DivScalarSels(t *testing.T) {
 	ys := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]uint8, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint8DivScalarSels(x, ys, rs, selects)
+	rs = Uint8DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint8, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -297,7 +298,7 @@ func TestUint8DivByScalar(t *testing.T) {
 	x := uint8(6)
 	ys := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]uint8, len(ys))
-	rs = uint8DivByScalar(x, ys, rs)
+	rs = Uint8DivByScalar(x, ys, rs)
 	rsCorrect := make([]uint8, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -310,7 +311,7 @@ func TestUint8DivByScalarSels(t *testing.T) {
 	ys := []uint8{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
 	rs := make([]uint8, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint8DivByScalarSels(x, ys, rs, selects)
+	rs = Uint8DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint8, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -322,7 +323,7 @@ func TestUint16Div(t *testing.T) {
 	xs := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	ys := []uint16{4, 3, 2, 1, 9, 12, 13, 24, 55, 96, 12, 10, 9, 6, 5, 15}
 	rs := make([]uint16, len(xs))
-	rs = uint16Div(xs, ys, rs)
+	rs = Uint16Div(xs, ys, rs)
 	rsCorrect := make([]uint16, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -335,7 +336,7 @@ func TestUint16DivSels(t *testing.T) {
 	ys := []uint16{4, 3, 2, 1, 9, 12, 13, 24, 55, 96, 12, 10, 9, 6, 5, 15}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]uint16, len(xs))
-	rs = uint16DivSels(xs, ys, rs, selects)
+	rs = Uint16DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]uint16, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -347,7 +348,7 @@ func TestUint16DivScalar(t *testing.T) {
 	x := uint16(6)
 	ys := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint16, len(ys))
-	rs = uint16DivScalar(x, ys, rs)
+	rs = Uint16DivScalar(x, ys, rs)
 	rsCorrect := make([]uint16, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -360,7 +361,7 @@ func TestUint16DivScalarSels(t *testing.T) {
 	ys := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint16, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint16DivScalarSels(x, ys, rs, selects)
+	rs = Uint16DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint16, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -372,7 +373,7 @@ func TestUint16DivByScalar(t *testing.T) {
 	x := uint16(6)
 	ys := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint16, len(ys))
-	rs = uint16DivByScalar(x, ys, rs)
+	rs = Uint16DivByScalar(x, ys, rs)
 	rsCorrect := make([]uint16, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -385,7 +386,7 @@ func TestUint16DivByScalarSels(t *testing.T) {
 	ys := []uint16{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint16, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint16DivByScalarSels(x, ys, rs, selects)
+	rs = Uint16DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint16, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -397,7 +398,7 @@ func TestUint32Div(t *testing.T) {
 	xs := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	ys := []uint32{4, 3, 2, 1, 9, 12, 13, 24, 55, 96, 12, 10, 9, 6, 5, 15}
 	rs := make([]uint32, len(xs))
-	rs = uint32Div(xs, ys, rs)
+	rs = Uint32Div(xs, ys, rs)
 	rsCorrect := make([]uint32, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -410,7 +411,7 @@ func TestUint32DivSels(t *testing.T) {
 	ys := []uint32{4, 3, 2, 1, 9, 12, 13, 24, 55, 96, 12, 10, 9, 6, 5, 15}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]uint32, len(xs))
-	rs = uint32DivSels(xs, ys, rs, selects)
+	rs = Uint32DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]uint32, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -422,7 +423,7 @@ func TestUint32DivScalar(t *testing.T) {
 	x := uint32(6)
 	ys := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint32, len(ys))
-	rs = uint32DivScalar(x, ys, rs)
+	rs = Uint32DivScalar(x, ys, rs)
 	rsCorrect := make([]uint32, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -435,7 +436,7 @@ func TestUint32DivScalarSels(t *testing.T) {
 	ys := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint32, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint32DivScalarSels(x, ys, rs, selects)
+	rs = Uint32DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint32, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -447,7 +448,7 @@ func TestUint32DivByScalar(t *testing.T) {
 	x := uint32(6)
 	ys := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint32, len(ys))
-	rs = uint32DivByScalar(x, ys, rs)
+	rs = Uint32DivByScalar(x, ys, rs)
 	rsCorrect := make([]uint32, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -460,7 +461,7 @@ func TestUint32DivByScalarSels(t *testing.T) {
 	ys := []uint32{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint32, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint32DivByScalarSels(x, ys, rs, selects)
+	rs = Uint32DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint32, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -472,7 +473,7 @@ func TestUint64Div(t *testing.T) {
 	xs := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	ys := []uint64{4, 3, 2, 1, 9, 12, 13, 24, 55, 96, 12, 10, 9, 6, 5, 15}
 	rs := make([]uint64, len(xs))
-	rs = uint64Div(xs, ys, rs)
+	rs = Uint64Div(xs, ys, rs)
 	rsCorrect := make([]uint64, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -485,7 +486,7 @@ func TestUint64DivSels(t *testing.T) {
 	ys := []uint64{4, 3, 2, 1, 9, 12, 13, 24, 55, 96, 12, 10, 9, 6, 5, 15}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]uint64, len(xs))
-	rs = uint64DivSels(xs, ys, rs, selects)
+	rs = Uint64DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]uint64, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -497,7 +498,7 @@ func TestUint64DivScalar(t *testing.T) {
 	x := uint64(6)
 	ys := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint64, len(ys))
-	rs = uint64DivScalar(x, ys, rs)
+	rs = Uint64DivScalar(x, ys, rs)
 	rsCorrect := make([]uint64, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -510,7 +511,7 @@ func TestUint64DivScalarSels(t *testing.T) {
 	ys := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint64, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint64DivScalarSels(x, ys, rs, selects)
+	rs = Uint64DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint64, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -522,7 +523,7 @@ func TestUint64DivByScalar(t *testing.T) {
 	x := uint64(6)
 	ys := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint64, len(ys))
-	rs = uint64DivByScalar(x, ys, rs)
+	rs = Uint64DivByScalar(x, ys, rs)
 	rsCorrect := make([]uint64, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -535,7 +536,7 @@ func TestUint64DivByScalarSels(t *testing.T) {
 	ys := []uint64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 44, 55, 1234, 12345}
 	rs := make([]uint64, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = uint64DivByScalarSels(x, ys, rs, selects)
+	rs = Uint64DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]uint64, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -547,7 +548,7 @@ func TestFloat32Div(t *testing.T) {
 	xs := []float32{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	ys := []float32{4, 3, 2, 1, 9, 12, 13.5, 24.5, 55.5, 96.6, 12.43, 10.5, 9.9, 6.2, 55, 15.6}
 	rs := make([]float32, len(xs))
-	rs = float32Div(xs, ys, rs)
+	rs = Float32Div(xs, ys, rs)
 	rsCorrect := make([]float32, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -560,7 +561,7 @@ func TestFloat32DivSels(t *testing.T) {
 	ys := []float32{4, 3, 2, 1, 9, 12, 13.5, 24.5, 55.5, 96.6, 12.43, 10.5, 9.9, 6.2, 55, 15.6}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]float32, len(xs))
-	rs = float32DivSels(xs, ys, rs, selects)
+	rs = Float32DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]float32, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -572,7 +573,7 @@ func TestFloat32DivScalar(t *testing.T) {
 	x := float32(6)
 	ys := []float32{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float32, len(ys))
-	rs = float32DivScalar(x, ys, rs)
+	rs = Float32DivScalar(x, ys, rs)
 	rsCorrect := make([]float32, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -585,7 +586,7 @@ func TestFloat32DivScalarSels(t *testing.T) {
 	ys := []float32{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float32, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = float32DivScalarSels(x, ys, rs, selects)
+	rs = Float32DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]float32, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -597,7 +598,7 @@ func TestFloat32DivByScalar(t *testing.T) {
 	x := float32(6)
 	ys := []float32{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float32, len(ys))
-	rs = float32DivByScalar(x, ys, rs)
+	rs = Float32DivByScalar(x, ys, rs)
 	rsCorrect := make([]float32, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -610,7 +611,7 @@ func TestFloat32DivByScalarSels(t *testing.T) {
 	ys := []float32{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float32, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = float32DivByScalarSels(x, ys, rs, selects)
+	rs = Float32DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]float32, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -622,7 +623,7 @@ func TestFloat64Div(t *testing.T) {
 	xs := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	ys := []float64{4, 3, 2, 1, 9, 12, 13.5, 24.5, 55.5, 96.6, 12.43, 10.5, 9.9, 6.2, 55, 15.6}
 	rs := make([]float64, len(xs))
-	rs = float64Div(xs, ys, rs)
+	rs = Float64Div(xs, ys, rs)
 	rsCorrect := make([]float64, len(xs))
 	for i := range xs {
 		rsCorrect[i] = xs[i] / ys[i]
@@ -635,7 +636,7 @@ func TestFloat64DivSels(t *testing.T) {
 	ys := []float64{4, 3, 2, 1, 9, 12, 13.5, 24.5, 55.5, 96.6, 12.43, 10.5, 9.9, 6.2, 55, 15.6}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]float64, len(xs))
-	rs = float64DivSels(xs, ys, rs, selects)
+	rs = Float64DivSels(xs, ys, rs, selects)
 	rsCorrect := make([]float64, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = xs[sel] / ys[sel]
@@ -647,7 +648,7 @@ func TestFloat64DivScalar(t *testing.T) {
 	x := float64(6)
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float64, len(ys))
-	rs = float64DivScalar(x, ys, rs)
+	rs = Float64DivScalar(x, ys, rs)
 	rsCorrect := make([]float64, len(ys))
 	for i := range ys {
 		rsCorrect[i] = x / ys[i]
@@ -660,7 +661,7 @@ func TestFloat64DivScalarSels(t *testing.T) {
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float64, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = float64DivScalarSels(x, ys, rs, selects)
+	rs = Float64DivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]float64, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = x / ys[sel]
@@ -672,7 +673,7 @@ func TestFloat64DivByScalar(t *testing.T) {
 	x := float64(6)
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float64, len(ys))
-	rs = float64DivByScalar(x, ys, rs)
+	rs = Float64DivByScalar(x, ys, rs)
 	rsCorrect := make([]float64, len(ys))
 	for i := range ys {
 		rsCorrect[i] = ys[i] / x
@@ -685,7 +686,7 @@ func TestFloat64DivByScalarSels(t *testing.T) {
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]float64, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = float64DivByScalarSels(x, ys, rs, selects)
+	rs = Float64DivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]float64, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = ys[sel] / x
@@ -697,7 +698,7 @@ func TestFloat64IntegerDiv(t *testing.T) {
 	xs := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	ys := []float64{4, 3, 2, 1, 9, 12, 13.5, 24.5, 55.5, 96.6, 12.43, 10.5, 9.9, 6.2, 55, 15.6}
 	rs := make([]int64, len(xs))
-	rs = float64IntegerDiv(xs, ys, rs)
+	rs = Float64IntegerDiv(xs, ys, rs)
 	rsCorrect := make([]int64, len(xs))
 	for i := range xs {
 		rsCorrect[i] = int64(xs[i] / ys[i])
@@ -710,7 +711,7 @@ func TestFloat64IntegerDivSels(t *testing.T) {
 	ys := []float64{4, 3, 2, 1, 9, 12, 13.5, 24.5, 55.5, 96.6, 12.43, 10.5, 9.9, 6.2, 55, 15.6}
 	selects := []int64{1, 3, 5, 6}
 	rs := make([]int64, len(xs))
-	rs = float64IntegerDivSels(xs, ys, rs, selects)
+	rs = Float64IntegerDivSels(xs, ys, rs, selects)
 	rsCorrect := make([]int64, len(xs))
 	for _, sel := range selects {
 		rsCorrect[sel] = int64(xs[sel] / ys[sel])
@@ -722,7 +723,7 @@ func TestFloat64IntegerDivScalar(t *testing.T) {
 	x := float64(6)
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]int64, len(ys))
-	rs = float64IntegerDivScalar(x, ys, rs)
+	rs = Float64IntegerDivScalar(x, ys, rs)
 	rsCorrect := make([]int64, len(ys))
 	for i := range ys {
 		rsCorrect[i] = int64(x / ys[i])
@@ -735,7 +736,7 @@ func TestFloat64IntegerDivScalarSels(t *testing.T) {
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]int64, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = float64IntegerDivScalarSels(x, ys, rs, selects)
+	rs = Float64IntegerDivScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]int64, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = int64(x / ys[sel])
@@ -747,7 +748,7 @@ func TestFloat64IntegerDivByScalar(t *testing.T) {
 	x := float64(6)
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]int64, len(ys))
-	rs = float64IntegerDivByScalar(x, ys, rs)
+	rs = Float64IntegerDivByScalar(x, ys, rs)
 	rsCorrect := make([]int64, len(ys))
 	for i := range ys {
 		rsCorrect[i] = int64(ys[i] / x)
@@ -760,7 +761,7 @@ func TestFloat64IntegerDivByScalarSels(t *testing.T) {
 	ys := []float64{1.5, 2.6, 35.6, 44.4, 55.9, 126, 77.7, 88.8, 99.9, 110, 220, 330, 440, 505, 12.3, 123.45}
 	rs := make([]int64, len(ys))
 	selects := []int64{1, 3, 5, 6}
-	rs = float64IntegerDivByScalarSels(x, ys, rs, selects)
+	rs = Float64IntegerDivByScalarSels(x, ys, rs, selects)
 	rsCorrect := make([]int64, len(ys))
 	for _, sel := range selects {
 		rsCorrect[sel] = int64(ys[sel] / x)
diff --git a/pkg/vectorize/eq/eq.go b/pkg/vectorize/eq/eq.go
index 5a943d4a090851ddbc128b43b8cfd625d9264f8a..e4e34cf7ccdcb1b232583bd5c7b704166ede7f00 100644
--- a/pkg/vectorize/eq/eq.go
+++ b/pkg/vectorize/eq/eq.go
@@ -16,1171 +16,125 @@ package eq
 
 import (
 	"bytes"
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
 
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
+
 	roaring "github.com/RoaringBitmap/roaring/roaring64"
 )
 
 var (
-	Int8Eq                         func([]int8, []int8, []int64) []int64
-	Int8EqNullable                 func([]int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8EqSels                     func([]int8, []int8, []int64, []int64) []int64
-	Int8EqNullableSels             func([]int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int8EqScalar                   func(int8, []int8, []int64) []int64
-	Int8EqNullableScalar           func(int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8EqScalarSels               func(int8, []int8, []int64, []int64) []int64
-	Int8EqNullableScalarSels       func(int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int16Eq                        func([]int16, []int16, []int64) []int64
-	Int16EqNullable                func([]int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16EqSels                    func([]int16, []int16, []int64, []int64) []int64
-	Int16EqNullableSels            func([]int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int16EqScalar                  func(int16, []int16, []int64) []int64
-	Int16EqNullableScalar          func(int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16EqScalarSels              func(int16, []int16, []int64, []int64) []int64
-	Int16EqNullableScalarSels      func(int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int32Eq                        func([]int32, []int32, []int64) []int64
-	Int32EqNullable                func([]int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32EqSels                    func([]int32, []int32, []int64, []int64) []int64
-	Int32EqNullableSels            func([]int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int32EqScalar                  func(int32, []int32, []int64) []int64
-	Int32EqNullableScalar          func(int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32EqScalarSels              func(int32, []int32, []int64, []int64) []int64
-	Int32EqNullableScalarSels      func(int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int64Eq                        func([]int64, []int64, []int64) []int64
-	Int64EqNullable                func([]int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64EqSels                    func([]int64, []int64, []int64, []int64) []int64
-	Int64EqNullableSels            func([]int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Int64EqScalar                  func(int64, []int64, []int64) []int64
-	Int64EqNullableScalar          func(int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64EqScalarSels              func(int64, []int64, []int64, []int64) []int64
-	Int64EqNullableScalarSels      func(int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8Eq                        func([]uint8, []uint8, []int64) []int64
-	Uint8EqNullable                func([]uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8EqSels                    func([]uint8, []uint8, []int64, []int64) []int64
-	Uint8EqNullableSels            func([]uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8EqScalar                  func(uint8, []uint8, []int64) []int64
-	Uint8EqNullableScalar          func(uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8EqScalarSels              func(uint8, []uint8, []int64, []int64) []int64
-	Uint8EqNullableScalarSels      func(uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16Eq                       func([]uint16, []uint16, []int64) []int64
-	Uint16EqNullable               func([]uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16EqSels                   func([]uint16, []uint16, []int64, []int64) []int64
-	Uint16EqNullableSels           func([]uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16EqScalar                 func(uint16, []uint16, []int64) []int64
-	Uint16EqNullableScalar         func(uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16EqScalarSels             func(uint16, []uint16, []int64, []int64) []int64
-	Uint16EqNullableScalarSels     func(uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32Eq                       func([]uint32, []uint32, []int64) []int64
-	Uint32EqNullable               func([]uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32EqSels                   func([]uint32, []uint32, []int64, []int64) []int64
-	Uint32EqNullableSels           func([]uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32EqScalar                 func(uint32, []uint32, []int64) []int64
-	Uint32EqNullableScalar         func(uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32EqScalarSels             func(uint32, []uint32, []int64, []int64) []int64
-	Uint32EqNullableScalarSels     func(uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64Eq                       func([]uint64, []uint64, []int64) []int64
-	Uint64EqNullable               func([]uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64EqSels                   func([]uint64, []uint64, []int64, []int64) []int64
-	Uint64EqNullableSels           func([]uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64EqScalar                 func(uint64, []uint64, []int64) []int64
-	Uint64EqNullableScalar         func(uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64EqScalarSels             func(uint64, []uint64, []int64, []int64) []int64
-	Uint64EqNullableScalarSels     func(uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Float32Eq                      func([]float32, []float32, []int64) []int64
-	Float32EqNullable              func([]float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32EqSels                  func([]float32, []float32, []int64, []int64) []int64
-	Float32EqNullableSels          func([]float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float32EqScalar                func(float32, []float32, []int64) []int64
-	Float32EqNullableScalar        func(float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32EqScalarSels            func(float32, []float32, []int64, []int64) []int64
-	Float32EqNullableScalarSels    func(float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float64Eq                      func([]float64, []float64, []int64) []int64
-	Float64EqNullable              func([]float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64EqSels                  func([]float64, []float64, []int64, []int64) []int64
-	Float64EqNullableSels          func([]float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	Float64EqScalar                func(float64, []float64, []int64) []int64
-	Float64EqNullableScalar        func(float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64EqScalarSels            func(float64, []float64, []int64, []int64) []int64
-	Float64EqNullableScalarSels    func(float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	StrEq                          func(*types.Bytes, *types.Bytes, []int64) []int64
-	StrEqNullable                  func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrEqSels                      func(*types.Bytes, *types.Bytes, []int64, []int64) []int64
-	StrEqNullableSels              func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	StrEqScalar                    func([]byte, *types.Bytes, []int64) []int64
-	StrEqNullableScalar            func([]byte, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrEqScalarSels                func([]byte, *types.Bytes, []int64, []int64) []int64
-	StrEqNullableScalarSels        func([]byte, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64Eq                    func([]types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64EqNullable            func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64EqSels                func([]types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64EqNullableSels        func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64EqScalar              func(types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64EqNullableScalar      func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64EqScalarSels          func(types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64EqNullableScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128Eq                   func([]types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128EqNullable           func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128EqSels               func([]types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128EqNullableSels       func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128EqScalar             func(types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128EqNullableScalar     func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128EqScalarSels         func(types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128EqNullableScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-)
-
-func init() {
-	Int8Eq = int8Eq
-	Int8EqNullable = int8EqNullable
-	Int8EqSels = int8EqSels
-	Int8EqNullableSels = int8EqNullableSels
-	Int8EqScalar = int8EqScalar
-	Int8EqNullableScalar = int8EqNullableScalar
-	Int8EqScalarSels = int8EqScalarSels
-	Int8EqNullableScalarSels = int8EqNullableScalarSels
-	Int16Eq = int16Eq
-	Int16EqNullable = int16EqNullable
-	Int16EqSels = int16EqSels
-	Int16EqNullableSels = int16EqNullableSels
-	Int16EqScalar = int16EqScalar
-	Int16EqNullableScalar = int16EqNullableScalar
-	Int16EqScalarSels = int16EqScalarSels
-	Int16EqNullableScalarSels = int16EqNullableScalarSels
-	Int32Eq = int32Eq
-	Int32EqNullable = int32EqNullable
-	Int32EqSels = int32EqSels
-	Int32EqNullableSels = int32EqNullableSels
-	Int32EqScalar = int32EqScalar
-	Int32EqNullableScalar = int32EqNullableScalar
-	Int32EqScalarSels = int32EqScalarSels
-	Int32EqNullableScalarSels = int32EqNullableScalarSels
-	Int64Eq = int64Eq
-	Int64EqNullable = int64EqNullable
-	Int64EqSels = int64EqSels
-	Int64EqNullableSels = int64EqNullableSels
-	Int64EqScalar = int64EqScalar
-	Int64EqNullableScalar = int64EqNullableScalar
-	Int64EqScalarSels = int64EqScalarSels
-	Int64EqNullableScalarSels = int64EqNullableScalarSels
-	Uint8Eq = uint8Eq
-	Uint8EqNullable = uint8EqNullable
-	Uint8EqSels = uint8EqSels
-	Uint8EqNullableSels = uint8EqNullableSels
-	Uint8EqScalar = uint8EqScalar
-	Uint8EqNullableScalar = uint8EqNullableScalar
-	Uint8EqScalarSels = uint8EqScalarSels
-	Uint8EqNullableScalarSels = uint8EqNullableScalarSels
-	Uint16Eq = uint16Eq
-	Uint16EqNullable = uint16EqNullable
-	Uint16EqSels = uint16EqSels
-	Uint16EqNullableSels = uint16EqNullableSels
-	Uint16EqScalar = uint16EqScalar
-	Uint16EqNullableScalar = uint16EqNullableScalar
-	Uint16EqScalarSels = uint16EqScalarSels
-	Uint16EqNullableScalarSels = uint16EqNullableScalarSels
-	Uint32Eq = uint32Eq
-	Uint32EqNullable = uint32EqNullable
-	Uint32EqSels = uint32EqSels
-	Uint32EqNullableSels = uint32EqNullableSels
-	Uint32EqScalar = uint32EqScalar
-	Uint32EqNullableScalar = uint32EqNullableScalar
-	Uint32EqScalarSels = uint32EqScalarSels
-	Uint32EqNullableScalarSels = uint32EqNullableScalarSels
-	Uint64Eq = uint64Eq
-	Uint64EqNullable = uint64EqNullable
-	Uint64EqSels = uint64EqSels
-	Uint64EqNullableSels = uint64EqNullableSels
-	Uint64EqScalar = uint64EqScalar
-	Uint64EqNullableScalar = uint64EqNullableScalar
-	Uint64EqScalarSels = uint64EqScalarSels
-	Uint64EqNullableScalarSels = uint64EqNullableScalarSels
-	Float32Eq = float32Eq
-	Float32EqNullable = float32EqNullable
-	Float32EqSels = float32EqSels
-	Float32EqNullableSels = float32EqNullableSels
-	Float32EqScalar = float32EqScalar
-	Float32EqNullableScalar = float32EqNullableScalar
-	Float32EqScalarSels = float32EqScalarSels
-	Float32EqNullableScalarSels = float32EqNullableScalarSels
-	Float64Eq = float64Eq
-	Float64EqNullable = float64EqNullable
-	Float64EqSels = float64EqSels
-	Float64EqNullableSels = float64EqNullableSels
-	Float64EqScalar = float64EqScalar
-	Float64EqNullableScalar = float64EqNullableScalar
-	Float64EqScalarSels = float64EqScalarSels
+	Int8Eq                      = numericEq[int8]
+	Int8EqNullable              = numericEqNullable[int8]
+	Int8EqSels                  = numericEqSels[int8]
+	Int8EqNullableSels          = numericEqNullableSels[int8]
+	Int8EqScalar                = numericEqScalar[int8]
+	Int8EqNullableScalar        = numericEqNullableScalar[int8]
+	Int8EqScalarSels            = numericEqScalarSels[int8]
+	Int8EqNullableScalarSels    = numericEqNullableScalarSels[int8]
+	Int16Eq                     = numericEq[int16]
+	Int16EqNullable             = numericEqNullable[int16]
+	Int16EqSels                 = numericEqSels[int16]
+	Int16EqNullableSels         = numericEqNullableSels[int16]
+	Int16EqScalar               = numericEqScalar[int16]
+	Int16EqNullableScalar       = numericEqNullableScalar[int16]
+	Int16EqScalarSels           = numericEqScalarSels[int16]
+	Int16EqNullableScalarSels   = numericEqNullableScalarSels[int16]
+	Int32Eq                     = numericEq[int32]
+	Int32EqNullable             = numericEqNullable[int32]
+	Int32EqSels                 = numericEqSels[int32]
+	Int32EqNullableSels         = numericEqNullableSels[int32]
+	Int32EqScalar               = numericEqScalar[int32]
+	Int32EqNullableScalar       = numericEqNullableScalar[int32]
+	Int32EqScalarSels           = numericEqScalarSels[int32]
+	Int32EqNullableScalarSels   = numericEqNullableScalarSels[int32]
+	Int64Eq                     = numericEq[int64]
+	Int64EqNullable             = numericEqNullable[int64]
+	Int64EqSels                 = numericEqSels[int64]
+	Int64EqNullableSels         = numericEqNullableSels[int64]
+	Int64EqScalar               = numericEqScalar[int64]
+	Int64EqNullableScalar       = numericEqNullableScalar[int64]
+	Int64EqScalarSels           = numericEqScalarSels[int64]
+	Int64EqNullableScalarSels   = numericEqNullableScalarSels[int64]
+	Uint8Eq                     = numericEq[uint8]
+	Uint8EqNullable             = numericEqNullable[uint8]
+	Uint8EqSels                 = numericEqSels[uint8]
+	Uint8EqNullableSels         = numericEqNullableSels[uint8]
+	Uint8EqScalar               = numericEqScalar[uint8]
+	Uint8EqNullableScalar       = numericEqNullableScalar[uint8]
+	Uint8EqScalarSels           = numericEqScalarSels[uint8]
+	Uint8EqNullableScalarSels   = numericEqNullableScalarSels[uint8]
+	Uint16Eq                    = numericEq[uint16]
+	Uint16EqNullable            = numericEqNullable[uint16]
+	Uint16EqSels                = numericEqSels[uint16]
+	Uint16EqNullableSels        = numericEqNullableSels[uint16]
+	Uint16EqScalar              = numericEqScalar[uint16]
+	Uint16EqNullableScalar      = numericEqNullableScalar[uint16]
+	Uint16EqScalarSels          = numericEqScalarSels[uint16]
+	Uint16EqNullableScalarSels  = numericEqNullableScalarSels[uint16]
+	Uint32Eq                    = numericEq[uint32]
+	Uint32EqNullable            = numericEqNullable[uint32]
+	Uint32EqSels                = numericEqSels[uint32]
+	Uint32EqNullableSels        = numericEqNullableSels[uint32]
+	Uint32EqScalar              = numericEqScalar[uint32]
+	Uint32EqNullableScalar      = numericEqNullableScalar[uint32]
+	Uint32EqScalarSels          = numericEqScalarSels[uint32]
+	Uint32EqNullableScalarSels  = numericEqNullableScalarSels[uint32]
+	Uint64Eq                    = numericEq[uint64]
+	Uint64EqNullable            = numericEqNullable[uint64]
+	Uint64EqSels                = numericEqSels[uint64]
+	Uint64EqNullableSels        = numericEqNullableSels[uint64]
+	Uint64EqScalar              = numericEqScalar[uint64]
+	Uint64EqNullableScalar      = numericEqNullableScalar[uint64]
+	Uint64EqScalarSels          = numericEqScalarSels[uint64]
+	Uint64EqNullableScalarSels  = numericEqNullableScalarSels[uint64]
+	Float32Eq                   = numericEq[float32]
+	Float32EqNullable           = numericEqNullable[float32]
+	Float32EqSels               = numericEqSels[float32]
+	Float32EqNullableSels       = numericEqNullableSels[float32]
+	Float32EqScalar             = numericEqScalar[float32]
+	Float32EqNullableScalar     = numericEqNullableScalar[float32]
+	Float32EqScalarSels         = numericEqScalarSels[float32]
+	Float32EqNullableScalarSels = numericEqNullableScalarSels[float32]
+
+	Float64Eq                   = float64Eq
+	Float64EqNullable           = float64EqNullable
+	Float64EqSels               = float64EqSels
+	Float64EqNullableSels       = float64EqNullableSels
+	Float64EqScalar             = float64EqScalar
+	Float64EqNullableScalar     = float64EqNullableScalar
+	Float64EqScalarSels         = float64EqScalarSels
 	Float64EqNullableScalarSels = float64EqNullableScalarSels
-	StrEq = strEq
-	StrEqNullable = strEqNullable
-	StrEqSels = strEqSels
-	StrEqNullableSels = strEqNullableSels
-	StrEqScalar = strEqScalar
-	StrEqNullableScalar = strEqNullableScalar
-	StrEqScalarSels = strEqScalarSels
-	StrEqNullableScalarSels = strEqNullableScalarSels
-	Decimal64Eq = decimal64Eq
-	Decimal64EqNullable = decimal64EqNullable
-	Decimal64EqSels = decimal64EqSels
-	Decimal64EqNullableSels = decimal64EqNullableSels
-	Decimal64EqScalar = decimal64EqScalar
-	Decimal64EqNullableScalar = decimal64EqNullableScalar
-	Decimal64EqScalarSels = decimal64EqScalarSels
-	Decimal64EqNullableScalarSels = decimal64EqNullableScalarSels
-	Decimal128Eq = decimal128Eq
-	Decimal128EqNullable = decimal128EqNullable
-	Decimal128EqSels = decimal128EqSels
-	Decimal128EqNullableSels = decimal128EqNullableSels
-	Decimal128EqScalar = decimal128EqScalar
-	Decimal128EqNullableScalar = decimal128EqNullableScalar
-	Decimal128EqScalarSels = decimal128EqScalarSels
-	Decimal128EqNullableScalarSels = decimal128EqNullableScalarSels
-}
-
-func int8Eq(xs, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqNullable(xs, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqSels(xs, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqNullableSels(xs, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqScalar(x int8, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqNullableScalar(x int8, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqScalarSels(x int8, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8EqNullableScalarSels(x int8, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16Eq(xs, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
 
-func int16EqNullable(xs, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16EqSels(xs, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16EqNullableSels(xs, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16EqScalar(x int16, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16EqNullableScalar(x int16, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16EqScalarSels(x int16, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16EqNullableScalarSels(x int16, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32Eq(xs, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqNullable(xs, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
+	StrEq                   = strEq
+	StrEqNullable           = strEqNullable
+	StrEqSels               = strEqSels
+	StrEqNullableSels       = strEqNullableSels
+	StrEqScalar             = strEqScalar
+	StrEqNullableScalar     = strEqNullableScalar
+	StrEqScalarSels         = strEqScalarSels
+	StrEqNullableScalarSels = strEqNullableScalarSels
 
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqSels(xs, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqNullableSels(xs, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqScalar(x int32, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqNullableScalar(x int32, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqScalarSels(x int32, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32EqNullableScalarSels(x int32, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64Eq(xs, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqNullable(xs, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqSels(xs, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqNullableSels(xs, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqScalar(x int64, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqNullableScalar(x int64, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqScalarSels(x int64, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64EqNullableScalarSels(x int64, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8Eq(xs, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqNullable(xs, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqSels(xs, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqNullableSels(xs, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqScalar(x uint8, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqNullableScalar(x uint8, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqScalarSels(x uint8, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8EqNullableScalarSels(x uint8, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16Eq(xs, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqNullable(xs, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqSels(xs, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqNullableSels(xs, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqScalar(x uint16, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqNullableScalar(x uint16, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqScalarSels(x uint16, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16EqNullableScalarSels(x uint16, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32Eq(xs, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqNullable(xs, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqSels(xs, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqNullableSels(xs, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqScalar(x uint32, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqNullableScalar(x uint32, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqScalarSels(x uint32, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32EqNullableScalarSels(x uint32, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64Eq(xs, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqNullable(xs, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqSels(xs, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqNullableSels(xs, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqScalar(x uint64, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqNullableScalar(x uint64, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x == y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqScalarSels(x uint64, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64EqNullableScalarSels(x uint64, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
+	Decimal64Eq                    = decimal64Eq
+	Decimal64EqNullable            = decimal64EqNullable
+	Decimal64EqSels                = decimal64EqSels
+	Decimal64EqNullableSels        = decimal64EqNullableSels
+	Decimal64EqScalar              = decimal64EqScalar
+	Decimal64EqNullableScalar      = decimal64EqNullableScalar
+	Decimal64EqScalarSels          = decimal64EqScalarSels
+	Decimal64EqNullableScalarSels  = decimal64EqNullableScalarSels
+	Decimal128Eq                   = decimal128Eq
+	Decimal128EqNullable           = decimal128EqNullable
+	Decimal128EqSels               = decimal128EqSels
+	Decimal128EqNullableSels       = decimal128EqNullableSels
+	Decimal128EqScalar             = decimal128EqScalar
+	Decimal128EqNullableScalar     = decimal128EqNullableScalar
+	Decimal128EqScalarSels         = decimal128EqScalarSels
+	Decimal128EqNullableScalarSels = decimal128EqNullableScalarSels
+)
 
-func float32Eq(xs, ys []float32, rs []int64) []int64 {
+func numericEq[T constraints.Integer | constraints.Float](xs, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, x := range xs {
 		if x == ys[i] {
@@ -1191,7 +145,7 @@ func float32Eq(xs, ys []float32, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float32EqNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericEqNullable[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1217,7 +171,7 @@ func float32EqNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []in
 	return rs[:rsi]
 }
 
-func float32EqSels(xs, ys []float32, rs, sels []int64) []int64 {
+func numericEqSels[T constraints.Integer | constraints.Float](xs, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if xs[sel] == ys[sel] {
@@ -1228,7 +182,7 @@ func float32EqSels(xs, ys []float32, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float32EqNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericEqNullableSels[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && xs[sel] == ys[sel] {
@@ -1239,7 +193,7 @@ func float32EqNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []i
 	return rs[:rsi]
 }
 
-func float32EqScalar(x float32, ys []float32, rs []int64) []int64 {
+func numericEqScalar[T constraints.Integer | constraints.Float](x T, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, y := range ys {
 		if x == y {
@@ -1250,7 +204,7 @@ func float32EqScalar(x float32, ys []float32, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float32EqNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericEqNullableScalar[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1276,7 +230,7 @@ func float32EqNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs
 	return rs[:rsi]
 }
 
-func float32EqScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
+func numericEqScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if x == ys[sel] {
@@ -1287,7 +241,7 @@ func float32EqScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float32EqNullableScalarSels(x float32, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericEqNullableScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && x == ys[sel] {
diff --git a/pkg/vectorize/floor/floor_amd64.go b/pkg/vectorize/floor/floor_amd64.go
index b460dc899cdecb5c95c687debfe4008522f7ffe0..e3abeca3e50b2c7067615ecb2083089e8a627937 100644
--- a/pkg/vectorize/floor/floor_amd64.go
+++ b/pkg/vectorize/floor/floor_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package floor
 
 import (
diff --git a/pkg/vectorize/ge/ge.go b/pkg/vectorize/ge/ge.go
index 9e3479a08e95687e1a8b14e7d768dffcd63819bf..e2c5749286a977c4ea76ddf67b817ee12aef66f6 100644
--- a/pkg/vectorize/ge/ge.go
+++ b/pkg/vectorize/ge/ge.go
@@ -16,1290 +16,124 @@ package ge
 
 import (
 	"bytes"
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
 
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
+
 	roaring "github.com/RoaringBitmap/roaring/roaring64"
 )
 
 var (
-	Int8Ge                      func([]int8, []int8, []int64) []int64
-	Int8GeNullable              func([]int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8GeSels                  func([]int8, []int8, []int64, []int64) []int64
-	Int8GeNullableSels          func([]int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int8GeScalar                func(int8, []int8, []int64) []int64
-	Int8GeNullableScalar        func(int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8GeScalarSels            func(int8, []int8, []int64, []int64) []int64
-	Int8GeNullableScalarSels    func(int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int16Ge                     func([]int16, []int16, []int64) []int64
-	Int16GeNullable             func([]int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16GeSels                 func([]int16, []int16, []int64, []int64) []int64
-	Int16GeNullableSels         func([]int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int16GeScalar               func(int16, []int16, []int64) []int64
-	Int16GeNullableScalar       func(int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16GeScalarSels           func(int16, []int16, []int64, []int64) []int64
-	Int16GeNullableScalarSels   func(int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int32Ge                     func([]int32, []int32, []int64) []int64
-	Int32GeNullable             func([]int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32GeSels                 func([]int32, []int32, []int64, []int64) []int64
-	Int32GeNullableSels         func([]int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int32GeScalar               func(int32, []int32, []int64) []int64
-	Int32GeNullableScalar       func(int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32GeScalarSels           func(int32, []int32, []int64, []int64) []int64
-	Int32GeNullableScalarSels   func(int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int64Ge                     func([]int64, []int64, []int64) []int64
-	Int64GeNullable             func([]int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64GeSels                 func([]int64, []int64, []int64, []int64) []int64
-	Int64GeNullableSels         func([]int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Int64GeScalar               func(int64, []int64, []int64) []int64
-	Int64GeNullableScalar       func(int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64GeScalarSels           func(int64, []int64, []int64, []int64) []int64
-	Int64GeNullableScalarSels   func(int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8Ge                     func([]uint8, []uint8, []int64) []int64
-	Uint8GeNullable             func([]uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8GeSels                 func([]uint8, []uint8, []int64, []int64) []int64
-	Uint8GeNullableSels         func([]uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8GeScalar               func(uint8, []uint8, []int64) []int64
-	Uint8GeNullableScalar       func(uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8GeScalarSels           func(uint8, []uint8, []int64, []int64) []int64
-	Uint8GeNullableScalarSels   func(uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16Ge                    func([]uint16, []uint16, []int64) []int64
-	Uint16GeNullable            func([]uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16GeSels                func([]uint16, []uint16, []int64, []int64) []int64
-	Uint16GeNullableSels        func([]uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16GeScalar              func(uint16, []uint16, []int64) []int64
-	Uint16GeNullableScalar      func(uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16GeScalarSels          func(uint16, []uint16, []int64, []int64) []int64
-	Uint16GeNullableScalarSels  func(uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32Ge                    func([]uint32, []uint32, []int64) []int64
-	Uint32GeNullable            func([]uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32GeSels                func([]uint32, []uint32, []int64, []int64) []int64
-	Uint32GeNullableSels        func([]uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32GeScalar              func(uint32, []uint32, []int64) []int64
-	Uint32GeNullableScalar      func(uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32GeScalarSels          func(uint32, []uint32, []int64, []int64) []int64
-	Uint32GeNullableScalarSels  func(uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64Ge                    func([]uint64, []uint64, []int64) []int64
-	Uint64GeNullable            func([]uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64GeSels                func([]uint64, []uint64, []int64, []int64) []int64
-	Uint64GeNullableSels        func([]uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64GeScalar              func(uint64, []uint64, []int64) []int64
-	Uint64GeNullableScalar      func(uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64GeScalarSels          func(uint64, []uint64, []int64, []int64) []int64
-	Uint64GeNullableScalarSels  func(uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Float32Ge                   func([]float32, []float32, []int64) []int64
-	Float32GeNullable           func([]float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32GeSels               func([]float32, []float32, []int64, []int64) []int64
-	Float32GeNullableSels       func([]float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float32GeScalar             func(float32, []float32, []int64) []int64
-	Float32GeNullableScalar     func(float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32GeScalarSels         func(float32, []float32, []int64, []int64) []int64
-	Float32GeNullableScalarSels func(float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float64Ge                   func([]float64, []float64, []int64) []int64
-	Float64GeNullable           func([]float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64GeSels               func([]float64, []float64, []int64, []int64) []int64
-	Float64GeNullableSels       func([]float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	Float64GeScalar             func(float64, []float64, []int64) []int64
-	Float64GeNullableScalar     func(float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64GeScalarSels         func(float64, []float64, []int64, []int64) []int64
-	Float64GeNullableScalarSels func(float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	StrGe                       func(*types.Bytes, *types.Bytes, []int64) []int64
-	StrGeNullable               func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrGeSels                   func(*types.Bytes, *types.Bytes, []int64, []int64) []int64
-	StrGeNullableSels           func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	StrGeScalar                 func([]byte, *types.Bytes, []int64) []int64
-	StrGeNullableScalar         func([]byte, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrGeScalarSels             func([]byte, *types.Bytes, []int64, []int64) []int64
-	StrGeNullableScalarSels     func([]byte, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-
-	Decimal64Ge                    func([]types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64GeNullable            func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64GeSels                func([]types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64GeNullableSels        func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64GeScalar              func(types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64GeNullableScalar      func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64GeScalarSels          func(types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64GeNullableScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128Ge                   func([]types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128GeNullable           func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128GeSels               func([]types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128GeNullableSels       func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128GeScalar             func(types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128GeNullableScalar     func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128GeScalarSels         func(types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128GeNullableScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-)
-
-func init() {
-	Int8Ge = int8Ge
-	Int8GeNullable = int8GeNullable
-	Int8GeSels = int8GeSels
-	Int8GeNullableSels = int8GeNullableSels
-	Int8GeScalar = int8GeScalar
-	Int8GeNullableScalar = int8GeNullableScalar
-	Int8GeScalarSels = int8GeScalarSels
-	Int8GeNullableScalarSels = int8GeNullableScalarSels
-	Int16Ge = int16Ge
-	Int16GeNullable = int16GeNullable
-	Int16GeSels = int16GeSels
-	Int16GeNullableSels = int16GeNullableSels
-	Int16GeScalar = int16GeScalar
-	Int16GeNullableScalar = int16GeNullableScalar
-	Int16GeScalarSels = int16GeScalarSels
-	Int16GeNullableScalarSels = int16GeNullableScalarSels
-	Int32Ge = int32Ge
-	Int32GeNullable = int32GeNullable
-	Int32GeSels = int32GeSels
-	Int32GeNullableSels = int32GeNullableSels
-	Int32GeScalar = int32GeScalar
-	Int32GeNullableScalar = int32GeNullableScalar
-	Int32GeScalarSels = int32GeScalarSels
-	Int32GeNullableScalarSels = int32GeNullableScalarSels
-	Int64Ge = int64Ge
-	Int64GeNullable = int64GeNullable
-	Int64GeSels = int64GeSels
-	Int64GeNullableSels = int64GeNullableSels
-	Int64GeScalar = int64GeScalar
-	Int64GeNullableScalar = int64GeNullableScalar
-	Int64GeScalarSels = int64GeScalarSels
-	Int64GeNullableScalarSels = int64GeNullableScalarSels
-	Uint8Ge = uint8Ge
-	Uint8GeNullable = uint8GeNullable
-	Uint8GeSels = uint8GeSels
-	Uint8GeNullableSels = uint8GeNullableSels
-	Uint8GeScalar = uint8GeScalar
-	Uint8GeNullableScalar = uint8GeNullableScalar
-	Uint8GeScalarSels = uint8GeScalarSels
-	Uint8GeNullableScalarSels = uint8GeNullableScalarSels
-	Uint16Ge = uint16Ge
-	Uint16GeNullable = uint16GeNullable
-	Uint16GeSels = uint16GeSels
-	Uint16GeNullableSels = uint16GeNullableSels
-	Uint16GeScalar = uint16GeScalar
-	Uint16GeNullableScalar = uint16GeNullableScalar
-	Uint16GeScalarSels = uint16GeScalarSels
-	Uint16GeNullableScalarSels = uint16GeNullableScalarSels
-	Uint32Ge = uint32Ge
-	Uint32GeNullable = uint32GeNullable
-	Uint32GeSels = uint32GeSels
-	Uint32GeNullableSels = uint32GeNullableSels
-	Uint32GeScalar = uint32GeScalar
-	Uint32GeNullableScalar = uint32GeNullableScalar
-	Uint32GeScalarSels = uint32GeScalarSels
-	Uint32GeNullableScalarSels = uint32GeNullableScalarSels
-	Uint64Ge = uint64Ge
-	Uint64GeNullable = uint64GeNullable
-	Uint64GeSels = uint64GeSels
-	Uint64GeNullableSels = uint64GeNullableSels
-	Uint64GeScalar = uint64GeScalar
-	Uint64GeNullableScalar = uint64GeNullableScalar
-	Uint64GeScalarSels = uint64GeScalarSels
-	Uint64GeNullableScalarSels = uint64GeNullableScalarSels
-	Float32Ge = float32Ge
-	Float32GeNullable = float32GeNullable
-	Float32GeSels = float32GeSels
-	Float32GeNullableSels = float32GeNullableSels
-	Float32GeScalar = float32GeScalar
-	Float32GeNullableScalar = float32GeNullableScalar
-	Float32GeScalarSels = float32GeScalarSels
-	Float32GeNullableScalarSels = float32GeNullableScalarSels
-	Float64Ge = float64Ge
-	Float64GeNullable = float64GeNullable
-	Float64GeSels = float64GeSels
-	Float64GeNullableSels = float64GeNullableSels
-	Float64GeScalar = float64GeScalar
-	Float64GeNullableScalar = float64GeNullableScalar
-	Float64GeScalarSels = float64GeScalarSels
-	Float64GeNullableScalarSels = float64GeNullableScalarSels
-	StrGe = strGe
-	StrGeNullable = strGeNullable
-	StrGeSels = strGeSels
-	StrGeNullableSels = strGeNullableSels
-	StrGeScalar = strGeScalar
-	StrGeNullableScalar = strGeNullableScalar
-	StrGeScalarSels = strGeScalarSels
+	Int8Ge                      = numericGe[int8]
+	Int8GeNullable              = numericGeNullable[int8]
+	Int8GeSels                  = numericGeSels[int8]
+	Int8GeNullableSels          = numericGeNullableSels[int8]
+	Int8GeScalar                = numericGeScalar[int8]
+	Int8GeNullableScalar        = numericGeNullableScalar[int8]
+	Int8GeScalarSels            = numericGeScalarSels[int8]
+	Int8GeNullableScalarSels    = numericGeNullableScalarSels[int8]
+	Int16Ge                     = numericGe[int16]
+	Int16GeNullable             = numericGeNullable[int16]
+	Int16GeSels                 = numericGeSels[int16]
+	Int16GeNullableSels         = numericGeNullableSels[int16]
+	Int16GeScalar               = numericGeScalar[int16]
+	Int16GeNullableScalar       = numericGeNullableScalar[int16]
+	Int16GeScalarSels           = numericGeScalarSels[int16]
+	Int16GeNullableScalarSels   = numericGeNullableScalarSels[int16]
+	Int32Ge                     = numericGe[int32]
+	Int32GeNullable             = numericGeNullable[int32]
+	Int32GeSels                 = numericGeSels[int32]
+	Int32GeNullableSels         = numericGeNullableSels[int32]
+	Int32GeScalar               = numericGeScalar[int32]
+	Int32GeNullableScalar       = numericGeNullableScalar[int32]
+	Int32GeScalarSels           = numericGeScalarSels[int32]
+	Int32GeNullableScalarSels   = numericGeNullableScalarSels[int32]
+	Int64Ge                     = numericGe[int64]
+	Int64GeNullable             = numericGeNullable[int64]
+	Int64GeSels                 = numericGeSels[int64]
+	Int64GeNullableSels         = numericGeNullableSels[int64]
+	Int64GeScalar               = numericGeScalar[int64]
+	Int64GeNullableScalar       = numericGeNullableScalar[int64]
+	Int64GeScalarSels           = numericGeScalarSels[int64]
+	Int64GeNullableScalarSels   = numericGeNullableScalarSels[int64]
+	Uint8Ge                     = numericGe[uint8]
+	Uint8GeNullable             = numericGeNullable[uint8]
+	Uint8GeSels                 = numericGeSels[uint8]
+	Uint8GeNullableSels         = numericGeNullableSels[uint8]
+	Uint8GeScalar               = numericGeScalar[uint8]
+	Uint8GeNullableScalar       = numericGeNullableScalar[uint8]
+	Uint8GeScalarSels           = numericGeScalarSels[uint8]
+	Uint8GeNullableScalarSels   = numericGeNullableScalarSels[uint8]
+	Uint16Ge                    = numericGe[uint16]
+	Uint16GeNullable            = numericGeNullable[uint16]
+	Uint16GeSels                = numericGeSels[uint16]
+	Uint16GeNullableSels        = numericGeNullableSels[uint16]
+	Uint16GeScalar              = numericGeScalar[uint16]
+	Uint16GeNullableScalar      = numericGeNullableScalar[uint16]
+	Uint16GeScalarSels          = numericGeScalarSels[uint16]
+	Uint16GeNullableScalarSels  = numericGeNullableScalarSels[uint16]
+	Uint32Ge                    = numericGe[uint32]
+	Uint32GeNullable            = numericGeNullable[uint32]
+	Uint32GeSels                = numericGeSels[uint32]
+	Uint32GeNullableSels        = numericGeNullableSels[uint32]
+	Uint32GeScalar              = numericGeScalar[uint32]
+	Uint32GeNullableScalar      = numericGeNullableScalar[uint32]
+	Uint32GeScalarSels          = numericGeScalarSels[uint32]
+	Uint32GeNullableScalarSels  = numericGeNullableScalarSels[uint32]
+	Uint64Ge                    = numericGe[uint64]
+	Uint64GeNullable            = numericGeNullable[uint64]
+	Uint64GeSels                = numericGeSels[uint64]
+	Uint64GeNullableSels        = numericGeNullableSels[uint64]
+	Uint64GeScalar              = numericGeScalar[uint64]
+	Uint64GeNullableScalar      = numericGeNullableScalar[uint64]
+	Uint64GeScalarSels          = numericGeScalarSels[uint64]
+	Uint64GeNullableScalarSels  = numericGeNullableScalarSels[uint64]
+	Float32Ge                   = numericGe[float32]
+	Float32GeNullable           = numericGeNullable[float32]
+	Float32GeSels               = numericGeSels[float32]
+	Float32GeNullableSels       = numericGeNullableSels[float32]
+	Float32GeScalar             = numericGeScalar[float32]
+	Float32GeNullableScalar     = numericGeNullableScalar[float32]
+	Float32GeScalarSels         = numericGeScalarSels[float32]
+	Float32GeNullableScalarSels = numericGeNullableScalarSels[float32]
+	Float64Ge                   = numericGe[float64]
+	Float64GeNullable           = numericGeNullable[float64]
+	Float64GeSels               = numericGeSels[float64]
+	Float64GeNullableSels       = numericGeNullableSels[float64]
+	Float64GeScalar             = numericGeScalar[float64]
+	Float64GeNullableScalar     = numericGeNullableScalar[float64]
+	Float64GeScalarSels         = numericGeScalarSels[float64]
+	Float64GeNullableScalarSels = numericGeNullableScalarSels[float64]
+
+	StrGe                   = strGe
+	StrGeNullable           = strGeNullable
+	StrGeSels               = strGeSels
+	StrGeNullableSels       = strGeNullableSels
+	StrGeScalar             = strGeScalar
+	StrGeNullableScalar     = strGeNullableScalar
+	StrGeScalarSels         = strGeScalarSels
 	StrGeNullableScalarSels = strGeNullableScalarSels
-	Decimal64Ge = decimal64Ge
-	Decimal64GeNullable = decimal64GeNullable
-	Decimal64GeSels = decimal64GeSels
-	Decimal64GeNullableSels = decimal64GeNullableSels
-	Decimal64GeScalar = decimal64GeScalar
-	Decimal64GeNullableScalar = decimal64GeNullableScalar
-	Decimal64GeScalarSels = decimal64GeScalarSels
-	Decimal64GeNullableScalarSels = decimal64GeNullableScalarSels
-	Decimal128Ge = decimal128Ge
-	Decimal128GeNullable = decimal128GeNullable
-	Decimal128GeSels = decimal128GeSels
-	Decimal128GeNullableSels = decimal128GeNullableSels
-	Decimal128GeScalar = decimal128GeScalar
-	Decimal128GeNullableScalar = decimal128GeNullableScalar
-	Decimal128GeScalarSels = decimal128GeScalarSels
-	Decimal128GeNullableScalarSels = decimal128GeNullableScalarSels
-}
-
-func int8Ge(xs, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeNullable(xs, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeSels(xs, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeNullableSels(xs, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeScalar(x int8, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeNullableScalar(x int8, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeScalarSels(x int8, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GeNullableScalarSels(x int8, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16Ge(xs, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
 
-func int16GeNullable(xs, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GeSels(xs, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GeNullableSels(xs, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GeScalar(x int16, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GeNullableScalar(x int16, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GeScalarSels(x int16, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GeNullableScalarSels(x int16, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32Ge(xs, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeNullable(xs, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeSels(xs, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeNullableSels(xs, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeScalar(x int32, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeNullableScalar(x int32, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeScalarSels(x int32, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GeNullableScalarSels(x int32, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64Ge(xs, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeNullable(xs, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeSels(xs, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeNullableSels(xs, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeScalar(x int64, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeNullableScalar(x int64, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeScalarSels(x int64, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GeNullableScalarSels(x int64, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8Ge(xs, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeNullable(xs, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeSels(xs, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeNullableSels(xs, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeScalar(x uint8, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeNullableScalar(x uint8, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeScalarSels(x uint8, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GeNullableScalarSels(x uint8, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16Ge(xs, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeNullable(xs, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeSels(xs, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeNullableSels(xs, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeScalar(x uint16, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeNullableScalar(x uint16, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeScalarSels(x uint16, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GeNullableScalarSels(x uint16, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32Ge(xs, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeNullable(xs, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeSels(xs, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeNullableSels(xs, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeScalar(x uint32, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeNullableScalar(x uint32, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeScalarSels(x uint32, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GeNullableScalarSels(x uint32, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64Ge(xs, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeNullable(xs, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeSels(xs, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeNullableSels(xs, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeScalar(x uint64, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeNullableScalar(x uint64, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeScalarSels(x uint64, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GeNullableScalarSels(x uint64, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32Ge(xs, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeSels(xs, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeScalar(x float32, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x >= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GeNullableScalarSels(x float32, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
+	Decimal64Ge                    = decimal64Ge
+	Decimal64GeNullable            = decimal64GeNullable
+	Decimal64GeSels                = decimal64GeSels
+	Decimal64GeNullableSels        = decimal64GeNullableSels
+	Decimal64GeScalar              = decimal64GeScalar
+	Decimal64GeNullableScalar      = decimal64GeNullableScalar
+	Decimal64GeScalarSels          = decimal64GeScalarSels
+	Decimal64GeNullableScalarSels  = decimal64GeNullableScalarSels
+	Decimal128Ge                   = decimal128Ge
+	Decimal128GeNullable           = decimal128GeNullable
+	Decimal128GeSels               = decimal128GeSels
+	Decimal128GeNullableSels       = decimal128GeNullableSels
+	Decimal128GeScalar             = decimal128GeScalar
+	Decimal128GeNullableScalar     = decimal128GeNullableScalar
+	Decimal128GeScalarSels         = decimal128GeScalarSels
+	Decimal128GeNullableScalarSels = decimal128GeNullableScalarSels
+)
 
-func float64Ge(xs, ys []float64, rs []int64) []int64 {
+func numericGe[T constraints.Integer | constraints.Float](xs, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, x := range xs {
 		if x >= ys[i] {
@@ -1310,7 +144,7 @@ func float64Ge(xs, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GeNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericGeNullable[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1336,7 +170,7 @@ func float64GeNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []in
 	return rs[:rsi]
 }
 
-func float64GeSels(xs, ys []float64, rs, sels []int64) []int64 {
+func numericGeSels[T constraints.Integer | constraints.Float](xs, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if xs[sel] >= ys[sel] {
@@ -1347,7 +181,7 @@ func float64GeSels(xs, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GeNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericGeNullableSels[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && xs[sel] >= ys[sel] {
@@ -1358,7 +192,7 @@ func float64GeNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []i
 	return rs[:rsi]
 }
 
-func float64GeScalar(x float64, ys []float64, rs []int64) []int64 {
+func numericGeScalar[T constraints.Integer | constraints.Float](x T, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, y := range ys {
 		if x >= y {
@@ -1369,7 +203,7 @@ func float64GeScalar(x float64, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GeNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericGeNullableScalar[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1395,7 +229,7 @@ func float64GeNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs
 	return rs[:rsi]
 }
 
-func float64GeScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
+func numericGeScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if x >= ys[sel] {
@@ -1406,7 +240,7 @@ func float64GeScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GeNullableScalarSels(x float64, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericGeNullableScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && x >= ys[sel] {
diff --git a/pkg/vectorize/gt/gt.go b/pkg/vectorize/gt/gt.go
index 9ba0bc6f127a5ae533c926bd7f4d644528fb414d..a12199c4910fe5350a1a1bb0637f6d3ee50785c3 100644
--- a/pkg/vectorize/gt/gt.go
+++ b/pkg/vectorize/gt/gt.go
@@ -16,1289 +16,123 @@ package gt
 
 import (
 	"bytes"
+	"math"
+
 	roaring "github.com/RoaringBitmap/roaring/roaring64"
 	"github.com/matrixorigin/matrixone/pkg/container/types"
-	"math"
+	"golang.org/x/exp/constraints"
 )
 
 var (
-	Int8Gt                      func([]int8, []int8, []int64) []int64
-	Int8GtNullable              func([]int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8GtSels                  func([]int8, []int8, []int64, []int64) []int64
-	Int8GtNullableSels          func([]int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int8GtScalar                func(int8, []int8, []int64) []int64
-	Int8GtNullableScalar        func(int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8GtScalarSels            func(int8, []int8, []int64, []int64) []int64
-	Int8GtNullableScalarSels    func(int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int16Gt                     func([]int16, []int16, []int64) []int64
-	Int16GtNullable             func([]int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16GtSels                 func([]int16, []int16, []int64, []int64) []int64
-	Int16GtNullableSels         func([]int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int16GtScalar               func(int16, []int16, []int64) []int64
-	Int16GtNullableScalar       func(int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16GtScalarSels           func(int16, []int16, []int64, []int64) []int64
-	Int16GtNullableScalarSels   func(int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int32Gt                     func([]int32, []int32, []int64) []int64
-	Int32GtNullable             func([]int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32GtSels                 func([]int32, []int32, []int64, []int64) []int64
-	Int32GtNullableSels         func([]int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int32GtScalar               func(int32, []int32, []int64) []int64
-	Int32GtNullableScalar       func(int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32GtScalarSels           func(int32, []int32, []int64, []int64) []int64
-	Int32GtNullableScalarSels   func(int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int64Gt                     func([]int64, []int64, []int64) []int64
-	Int64GtNullable             func([]int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64GtSels                 func([]int64, []int64, []int64, []int64) []int64
-	Int64GtNullableSels         func([]int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Int64GtScalar               func(int64, []int64, []int64) []int64
-	Int64GtNullableScalar       func(int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64GtScalarSels           func(int64, []int64, []int64, []int64) []int64
-	Int64GtNullableScalarSels   func(int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8Gt                     func([]uint8, []uint8, []int64) []int64
-	Uint8GtNullable             func([]uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8GtSels                 func([]uint8, []uint8, []int64, []int64) []int64
-	Uint8GtNullableSels         func([]uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8GtScalar               func(uint8, []uint8, []int64) []int64
-	Uint8GtNullableScalar       func(uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8GtScalarSels           func(uint8, []uint8, []int64, []int64) []int64
-	Uint8GtNullableScalarSels   func(uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16Gt                    func([]uint16, []uint16, []int64) []int64
-	Uint16GtNullable            func([]uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16GtSels                func([]uint16, []uint16, []int64, []int64) []int64
-	Uint16GtNullableSels        func([]uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16GtScalar              func(uint16, []uint16, []int64) []int64
-	Uint16GtNullableScalar      func(uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16GtScalarSels          func(uint16, []uint16, []int64, []int64) []int64
-	Uint16GtNullableScalarSels  func(uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32Gt                    func([]uint32, []uint32, []int64) []int64
-	Uint32GtNullable            func([]uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32GtSels                func([]uint32, []uint32, []int64, []int64) []int64
-	Uint32GtNullableSels        func([]uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32GtScalar              func(uint32, []uint32, []int64) []int64
-	Uint32GtNullableScalar      func(uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32GtScalarSels          func(uint32, []uint32, []int64, []int64) []int64
-	Uint32GtNullableScalarSels  func(uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64Gt                    func([]uint64, []uint64, []int64) []int64
-	Uint64GtNullable            func([]uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64GtSels                func([]uint64, []uint64, []int64, []int64) []int64
-	Uint64GtNullableSels        func([]uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64GtScalar              func(uint64, []uint64, []int64) []int64
-	Uint64GtNullableScalar      func(uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64GtScalarSels          func(uint64, []uint64, []int64, []int64) []int64
-	Uint64GtNullableScalarSels  func(uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Float32Gt                   func([]float32, []float32, []int64) []int64
-	Float32GtNullable           func([]float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32GtSels               func([]float32, []float32, []int64, []int64) []int64
-	Float32GtNullableSels       func([]float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float32GtScalar             func(float32, []float32, []int64) []int64
-	Float32GtNullableScalar     func(float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32GtScalarSels         func(float32, []float32, []int64, []int64) []int64
-	Float32GtNullableScalarSels func(float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float64Gt                   func([]float64, []float64, []int64) []int64
-	Float64GtNullable           func([]float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64GtSels               func([]float64, []float64, []int64, []int64) []int64
-	Float64GtNullableSels       func([]float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	Float64GtScalar             func(float64, []float64, []int64) []int64
-	Float64GtNullableScalar     func(float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64GtScalarSels         func(float64, []float64, []int64, []int64) []int64
-	Float64GtNullableScalarSels func(float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	StrGt                       func(*types.Bytes, *types.Bytes, []int64) []int64
-	StrGtNullable               func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrGtSels                   func(*types.Bytes, *types.Bytes, []int64, []int64) []int64
-	StrGtNullableSels           func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	StrGtScalar                 func([]byte, *types.Bytes, []int64) []int64
-	StrGtNullableScalar         func([]byte, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrGtScalarSels             func([]byte, *types.Bytes, []int64, []int64) []int64
-	StrGtNullableScalarSels     func([]byte, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-
-	Decimal64Gt                    func([]types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64GtNullable            func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64GtSels                func([]types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64GtNullableSels        func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64GtScalar              func(types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64GtNullableScalar      func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64GtScalarSels          func(types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64GtNullableScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128Gt                   func([]types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128GtNullable           func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128GtSels               func([]types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128GtNullableSels       func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128GtScalar             func(types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128GtNullableScalar     func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128GtScalarSels         func(types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128GtNullableScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-)
-
-func init() {
-	Int8Gt = int8Gt
-	Int8GtNullable = int8GtNullable
-	Int8GtSels = int8GtSels
-	Int8GtNullableSels = int8GtNullableSels
-	Int8GtScalar = int8GtScalar
-	Int8GtNullableScalar = int8GtNullableScalar
-	Int8GtScalarSels = int8GtScalarSels
-	Int8GtNullableScalarSels = int8GtNullableScalarSels
-	Int16Gt = int16Gt
-	Int16GtNullable = int16GtNullable
-	Int16GtSels = int16GtSels
-	Int16GtNullableSels = int16GtNullableSels
-	Int16GtScalar = int16GtScalar
-	Int16GtNullableScalar = int16GtNullableScalar
-	Int16GtScalarSels = int16GtScalarSels
-	Int16GtNullableScalarSels = int16GtNullableScalarSels
-	Int32Gt = int32Gt
-	Int32GtNullable = int32GtNullable
-	Int32GtSels = int32GtSels
-	Int32GtNullableSels = int32GtNullableSels
-	Int32GtScalar = int32GtScalar
-	Int32GtNullableScalar = int32GtNullableScalar
-	Int32GtScalarSels = int32GtScalarSels
-	Int32GtNullableScalarSels = int32GtNullableScalarSels
-	Int64Gt = int64Gt
-	Int64GtNullable = int64GtNullable
-	Int64GtSels = int64GtSels
-	Int64GtNullableSels = int64GtNullableSels
-	Int64GtScalar = int64GtScalar
-	Int64GtNullableScalar = int64GtNullableScalar
-	Int64GtScalarSels = int64GtScalarSels
-	Int64GtNullableScalarSels = int64GtNullableScalarSels
-	Uint8Gt = uint8Gt
-	Uint8GtNullable = uint8GtNullable
-	Uint8GtSels = uint8GtSels
-	Uint8GtNullableSels = uint8GtNullableSels
-	Uint8GtScalar = uint8GtScalar
-	Uint8GtNullableScalar = uint8GtNullableScalar
-	Uint8GtScalarSels = uint8GtScalarSels
-	Uint8GtNullableScalarSels = uint8GtNullableScalarSels
-	Uint16Gt = uint16Gt
-	Uint16GtNullable = uint16GtNullable
-	Uint16GtSels = uint16GtSels
-	Uint16GtNullableSels = uint16GtNullableSels
-	Uint16GtScalar = uint16GtScalar
-	Uint16GtNullableScalar = uint16GtNullableScalar
-	Uint16GtScalarSels = uint16GtScalarSels
-	Uint16GtNullableScalarSels = uint16GtNullableScalarSels
-	Uint32Gt = uint32Gt
-	Uint32GtNullable = uint32GtNullable
-	Uint32GtSels = uint32GtSels
-	Uint32GtNullableSels = uint32GtNullableSels
-	Uint32GtScalar = uint32GtScalar
-	Uint32GtNullableScalar = uint32GtNullableScalar
-	Uint32GtScalarSels = uint32GtScalarSels
-	Uint32GtNullableScalarSels = uint32GtNullableScalarSels
-	Uint64Gt = uint64Gt
-	Uint64GtNullable = uint64GtNullable
-	Uint64GtSels = uint64GtSels
-	Uint64GtNullableSels = uint64GtNullableSels
-	Uint64GtScalar = uint64GtScalar
-	Uint64GtNullableScalar = uint64GtNullableScalar
-	Uint64GtScalarSels = uint64GtScalarSels
-	Uint64GtNullableScalarSels = uint64GtNullableScalarSels
-	Float32Gt = float32Gt
-	Float32GtNullable = float32GtNullable
-	Float32GtSels = float32GtSels
-	Float32GtNullableSels = float32GtNullableSels
-	Float32GtScalar = float32GtScalar
-	Float32GtNullableScalar = float32GtNullableScalar
-	Float32GtScalarSels = float32GtScalarSels
-	Float32GtNullableScalarSels = float32GtNullableScalarSels
-	Float64Gt = float64Gt
-	Float64GtNullable = float64GtNullable
-	Float64GtSels = float64GtSels
-	Float64GtNullableSels = float64GtNullableSels
-	Float64GtScalar = float64GtScalar
-	Float64GtNullableScalar = float64GtNullableScalar
-	Float64GtScalarSels = float64GtScalarSels
-	Float64GtNullableScalarSels = float64GtNullableScalarSels
-	StrGt = strGt
-	StrGtNullable = strGtNullable
-	StrGtSels = strGtSels
-	StrGtNullableSels = strGtNullableSels
-	StrGtScalar = strGtScalar
-	StrGtNullableScalar = strGtNullableScalar
-	StrGtScalarSels = strGtScalarSels
+	Int8Gt                      = numericGt[int8]
+	Int8GtNullable              = numericGtNullable[int8]
+	Int8GtSels                  = numericGtSels[int8]
+	Int8GtNullableSels          = numericGtNullableSels[int8]
+	Int8GtScalar                = numericGtScalar[int8]
+	Int8GtNullableScalar        = numericGtNullableScalar[int8]
+	Int8GtScalarSels            = numericGtScalarSels[int8]
+	Int8GtNullableScalarSels    = numericGtNullableScalarSels[int8]
+	Int16Gt                     = numericGt[int16]
+	Int16GtNullable             = numericGtNullable[int16]
+	Int16GtSels                 = numericGtSels[int16]
+	Int16GtNullableSels         = numericGtNullableSels[int16]
+	Int16GtScalar               = numericGtScalar[int16]
+	Int16GtNullableScalar       = numericGtNullableScalar[int16]
+	Int16GtScalarSels           = numericGtScalarSels[int16]
+	Int16GtNullableScalarSels   = numericGtNullableScalarSels[int16]
+	Int32Gt                     = numericGt[int32]
+	Int32GtNullable             = numericGtNullable[int32]
+	Int32GtSels                 = numericGtSels[int32]
+	Int32GtNullableSels         = numericGtNullableSels[int32]
+	Int32GtScalar               = numericGtScalar[int32]
+	Int32GtNullableScalar       = numericGtNullableScalar[int32]
+	Int32GtScalarSels           = numericGtScalarSels[int32]
+	Int32GtNullableScalarSels   = numericGtNullableScalarSels[int32]
+	Int64Gt                     = numericGt[int64]
+	Int64GtNullable             = numericGtNullable[int64]
+	Int64GtSels                 = numericGtSels[int64]
+	Int64GtNullableSels         = numericGtNullableSels[int64]
+	Int64GtScalar               = numericGtScalar[int64]
+	Int64GtNullableScalar       = numericGtNullableScalar[int64]
+	Int64GtScalarSels           = numericGtScalarSels[int64]
+	Int64GtNullableScalarSels   = numericGtNullableScalarSels[int64]
+	Uint8Gt                     = numericGt[uint8]
+	Uint8GtNullable             = numericGtNullable[uint8]
+	Uint8GtSels                 = numericGtSels[uint8]
+	Uint8GtNullableSels         = numericGtNullableSels[uint8]
+	Uint8GtScalar               = numericGtScalar[uint8]
+	Uint8GtNullableScalar       = numericGtNullableScalar[uint8]
+	Uint8GtScalarSels           = numericGtScalarSels[uint8]
+	Uint8GtNullableScalarSels   = numericGtNullableScalarSels[uint8]
+	Uint16Gt                    = numericGt[uint16]
+	Uint16GtNullable            = numericGtNullable[uint16]
+	Uint16GtSels                = numericGtSels[uint16]
+	Uint16GtNullableSels        = numericGtNullableSels[uint16]
+	Uint16GtScalar              = numericGtScalar[uint16]
+	Uint16GtNullableScalar      = numericGtNullableScalar[uint16]
+	Uint16GtScalarSels          = numericGtScalarSels[uint16]
+	Uint16GtNullableScalarSels  = numericGtNullableScalarSels[uint16]
+	Uint32Gt                    = numericGt[uint32]
+	Uint32GtNullable            = numericGtNullable[uint32]
+	Uint32GtSels                = numericGtSels[uint32]
+	Uint32GtNullableSels        = numericGtNullableSels[uint32]
+	Uint32GtScalar              = numericGtScalar[uint32]
+	Uint32GtNullableScalar      = numericGtNullableScalar[uint32]
+	Uint32GtScalarSels          = numericGtScalarSels[uint32]
+	Uint32GtNullableScalarSels  = numericGtNullableScalarSels[uint32]
+	Uint64Gt                    = numericGt[uint64]
+	Uint64GtNullable            = numericGtNullable[uint64]
+	Uint64GtSels                = numericGtSels[uint64]
+	Uint64GtNullableSels        = numericGtNullableSels[uint64]
+	Uint64GtScalar              = numericGtScalar[uint64]
+	Uint64GtNullableScalar      = numericGtNullableScalar[uint64]
+	Uint64GtScalarSels          = numericGtScalarSels[uint64]
+	Uint64GtNullableScalarSels  = numericGtNullableScalarSels[uint64]
+	Float32Gt                   = numericGt[float32]
+	Float32GtNullable           = numericGtNullable[float32]
+	Float32GtSels               = numericGtSels[float32]
+	Float32GtNullableSels       = numericGtNullableSels[float32]
+	Float32GtScalar             = numericGtScalar[float32]
+	Float32GtNullableScalar     = numericGtNullableScalar[float32]
+	Float32GtScalarSels         = numericGtScalarSels[float32]
+	Float32GtNullableScalarSels = numericGtNullableScalarSels[float32]
+	Float64Gt                   = numericGt[float64]
+	Float64GtNullable           = numericGtNullable[float64]
+	Float64GtSels               = numericGtSels[float64]
+	Float64GtNullableSels       = numericGtNullableSels[float64]
+	Float64GtScalar             = numericGtScalar[float64]
+	Float64GtNullableScalar     = numericGtNullableScalar[float64]
+	Float64GtScalarSels         = numericGtScalarSels[float64]
+	Float64GtNullableScalarSels = numericGtNullableScalarSels[float64]
+
+	StrGt                   = strGt
+	StrGtNullable           = strGtNullable
+	StrGtSels               = strGtSels
+	StrGtNullableSels       = strGtNullableSels
+	StrGtScalar             = strGtScalar
+	StrGtNullableScalar     = strGtNullableScalar
+	StrGtScalarSels         = strGtScalarSels
 	StrGtNullableScalarSels = strGtNullableScalarSels
-	Decimal64Gt = decimal64Gt
-	Decimal64GtNullable = decimal64GtNullable
-	Decimal64GtSels = decimal64GtSels
-	Decimal64GtNullableSels = decimal64GtNullableSels
-	Decimal64GtScalar = decimal64GtScalar
-	Decimal64GtNullableScalar = decimal64GtNullableScalar
-	Decimal64GtScalarSels = decimal64GtScalarSels
-	Decimal64GtNullableScalarSels = decimal64GtNullableScalarSels
-	Decimal128Gt = decimal128Gt
-	Decimal128GtNullable = decimal128GtNullable
-	Decimal128GtSels = decimal128GtSels
-	Decimal128GtNullableSels = decimal128GtNullableSels
-	Decimal128GtScalar = decimal128GtScalar
-	Decimal128GtNullableScalar = decimal128GtNullableScalar
-	Decimal128GtScalarSels = decimal128GtScalarSels
-	Decimal128GtNullableScalarSels = decimal128GtNullableScalarSels
-}
-
-func int8Gt(xs, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtNullable(xs, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtSels(xs, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtNullableSels(xs, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtScalar(x int8, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtNullableScalar(x int8, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtScalarSels(x int8, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8GtNullableScalarSels(x int8, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16Gt(xs, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
 
-func int16GtNullable(xs, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GtSels(xs, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GtNullableSels(xs, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GtScalar(x int16, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GtNullableScalar(x int16, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GtScalarSels(x int16, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16GtNullableScalarSels(x int16, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32Gt(xs, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtNullable(xs, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtSels(xs, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtNullableSels(xs, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtScalar(x int32, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtNullableScalar(x int32, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtScalarSels(x int32, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32GtNullableScalarSels(x int32, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64Gt(xs, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtNullable(xs, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtSels(xs, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtNullableSels(xs, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtScalar(x int64, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtNullableScalar(x int64, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtScalarSels(x int64, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64GtNullableScalarSels(x int64, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8Gt(xs, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtNullable(xs, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtSels(xs, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtNullableSels(xs, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtScalar(x uint8, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtNullableScalar(x uint8, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtScalarSels(x uint8, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8GtNullableScalarSels(x uint8, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16Gt(xs, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtNullable(xs, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtSels(xs, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtNullableSels(xs, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtScalar(x uint16, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtNullableScalar(x uint16, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtScalarSels(x uint16, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16GtNullableScalarSels(x uint16, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32Gt(xs, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtNullable(xs, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtSels(xs, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtNullableSels(xs, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtScalar(x uint32, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtNullableScalar(x uint32, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtScalarSels(x uint32, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32GtNullableScalarSels(x uint32, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64Gt(xs, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtNullable(xs, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtSels(xs, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtNullableSels(xs, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtScalar(x uint64, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtNullableScalar(x uint64, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtScalarSels(x uint64, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64GtNullableScalarSels(x uint64, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32Gt(xs, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtSels(xs, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtScalar(x float32, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x > y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32GtNullableScalarSels(x float32, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
+	Decimal64Gt                    = decimal64Gt
+	Decimal64GtNullable            = decimal64GtNullable
+	Decimal64GtSels                = decimal64GtSels
+	Decimal64GtNullableSels        = decimal64GtNullableSels
+	Decimal64GtScalar              = decimal64GtScalar
+	Decimal64GtNullableScalar      = decimal64GtNullableScalar
+	Decimal64GtScalarSels          = decimal64GtScalarSels
+	Decimal64GtNullableScalarSels  = decimal64GtNullableScalarSels
+	Decimal128Gt                   = decimal128Gt
+	Decimal128GtNullable           = decimal128GtNullable
+	Decimal128GtSels               = decimal128GtSels
+	Decimal128GtNullableSels       = decimal128GtNullableSels
+	Decimal128GtScalar             = decimal128GtScalar
+	Decimal128GtNullableScalar     = decimal128GtNullableScalar
+	Decimal128GtScalarSels         = decimal128GtScalarSels
+	Decimal128GtNullableScalarSels = decimal128GtNullableScalarSels
+)
 
-func float64Gt(xs, ys []float64, rs []int64) []int64 {
+func numericGt[T constraints.Integer | constraints.Float](xs, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, x := range xs {
 		if x > ys[i] {
@@ -1309,7 +143,7 @@ func float64Gt(xs, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GtNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericGtNullable[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1335,7 +169,7 @@ func float64GtNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []in
 	return rs[:rsi]
 }
 
-func float64GtSels(xs, ys []float64, rs, sels []int64) []int64 {
+func numericGtSels[T constraints.Integer | constraints.Float](xs, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if xs[sel] > ys[sel] {
@@ -1346,7 +180,7 @@ func float64GtSels(xs, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GtNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericGtNullableSels[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && xs[sel] > ys[sel] {
@@ -1357,7 +191,7 @@ func float64GtNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []i
 	return rs[:rsi]
 }
 
-func float64GtScalar(x float64, ys []float64, rs []int64) []int64 {
+func numericGtScalar[T constraints.Integer | constraints.Float](x T, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, y := range ys {
 		if x > y {
@@ -1368,7 +202,7 @@ func float64GtScalar(x float64, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GtNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericGtNullableScalar[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1394,7 +228,7 @@ func float64GtNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs
 	return rs[:rsi]
 }
 
-func float64GtScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
+func numericGtScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if x > ys[sel] {
@@ -1405,7 +239,7 @@ func float64GtScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64GtNullableScalarSels(x float64, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericGtNullableScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && x > ys[sel] {
diff --git a/pkg/vectorize/le/le.go b/pkg/vectorize/le/le.go
index f9516f6a83e099db27fca39083669eead1156bfb..0a945d14d2c11581b3d4d318327c0ca3c913f68c 100644
--- a/pkg/vectorize/le/le.go
+++ b/pkg/vectorize/le/le.go
@@ -16,1290 +16,124 @@ package le
 
 import (
 	"bytes"
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
 
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
+
 	roaring "github.com/RoaringBitmap/roaring/roaring64"
 )
 
 var (
-	Int8Le                      func([]int8, []int8, []int64) []int64
-	Int8LeNullable              func([]int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8LeSels                  func([]int8, []int8, []int64, []int64) []int64
-	Int8LeNullableSels          func([]int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int8LeScalar                func(int8, []int8, []int64) []int64
-	Int8LeNullableScalar        func(int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8LeScalarSels            func(int8, []int8, []int64, []int64) []int64
-	Int8LeNullableScalarSels    func(int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int16Le                     func([]int16, []int16, []int64) []int64
-	Int16LeNullable             func([]int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16LeSels                 func([]int16, []int16, []int64, []int64) []int64
-	Int16LeNullableSels         func([]int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int16LeScalar               func(int16, []int16, []int64) []int64
-	Int16LeNullableScalar       func(int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16LeScalarSels           func(int16, []int16, []int64, []int64) []int64
-	Int16LeNullableScalarSels   func(int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int32Le                     func([]int32, []int32, []int64) []int64
-	Int32LeNullable             func([]int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32LeSels                 func([]int32, []int32, []int64, []int64) []int64
-	Int32LeNullableSels         func([]int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int32LeScalar               func(int32, []int32, []int64) []int64
-	Int32LeNullableScalar       func(int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32LeScalarSels           func(int32, []int32, []int64, []int64) []int64
-	Int32LeNullableScalarSels   func(int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int64Le                     func([]int64, []int64, []int64) []int64
-	Int64LeNullable             func([]int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64LeSels                 func([]int64, []int64, []int64, []int64) []int64
-	Int64LeNullableSels         func([]int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Int64LeScalar               func(int64, []int64, []int64) []int64
-	Int64LeNullableScalar       func(int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64LeScalarSels           func(int64, []int64, []int64, []int64) []int64
-	Int64LeNullableScalarSels   func(int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8Le                     func([]uint8, []uint8, []int64) []int64
-	Uint8LeNullable             func([]uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8LeSels                 func([]uint8, []uint8, []int64, []int64) []int64
-	Uint8LeNullableSels         func([]uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8LeScalar               func(uint8, []uint8, []int64) []int64
-	Uint8LeNullableScalar       func(uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8LeScalarSels           func(uint8, []uint8, []int64, []int64) []int64
-	Uint8LeNullableScalarSels   func(uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16Le                    func([]uint16, []uint16, []int64) []int64
-	Uint16LeNullable            func([]uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16LeSels                func([]uint16, []uint16, []int64, []int64) []int64
-	Uint16LeNullableSels        func([]uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16LeScalar              func(uint16, []uint16, []int64) []int64
-	Uint16LeNullableScalar      func(uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16LeScalarSels          func(uint16, []uint16, []int64, []int64) []int64
-	Uint16LeNullableScalarSels  func(uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32Le                    func([]uint32, []uint32, []int64) []int64
-	Uint32LeNullable            func([]uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32LeSels                func([]uint32, []uint32, []int64, []int64) []int64
-	Uint32LeNullableSels        func([]uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32LeScalar              func(uint32, []uint32, []int64) []int64
-	Uint32LeNullableScalar      func(uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32LeScalarSels          func(uint32, []uint32, []int64, []int64) []int64
-	Uint32LeNullableScalarSels  func(uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64Le                    func([]uint64, []uint64, []int64) []int64
-	Uint64LeNullable            func([]uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64LeSels                func([]uint64, []uint64, []int64, []int64) []int64
-	Uint64LeNullableSels        func([]uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64LeScalar              func(uint64, []uint64, []int64) []int64
-	Uint64LeNullableScalar      func(uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64LeScalarSels          func(uint64, []uint64, []int64, []int64) []int64
-	Uint64LeNullableScalarSels  func(uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Float32Le                   func([]float32, []float32, []int64) []int64
-	Float32LeNullable           func([]float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32LeSels               func([]float32, []float32, []int64, []int64) []int64
-	Float32LeNullableSels       func([]float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float32LeScalar             func(float32, []float32, []int64) []int64
-	Float32LeNullableScalar     func(float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32LeScalarSels         func(float32, []float32, []int64, []int64) []int64
-	Float32LeNullableScalarSels func(float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float64Le                   func([]float64, []float64, []int64) []int64
-	Float64LeNullable           func([]float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64LeSels               func([]float64, []float64, []int64, []int64) []int64
-	Float64LeNullableSels       func([]float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	Float64LeScalar             func(float64, []float64, []int64) []int64
-	Float64LeNullableScalar     func(float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64LeScalarSels         func(float64, []float64, []int64, []int64) []int64
-	Float64LeNullableScalarSels func(float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	StrLe                       func(*types.Bytes, *types.Bytes, []int64) []int64
-	StrLeNullable               func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrLeSels                   func(*types.Bytes, *types.Bytes, []int64, []int64) []int64
-	StrLeNullableSels           func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	StrLeScalar                 func([]byte, *types.Bytes, []int64) []int64
-	StrLeNullableScalar         func([]byte, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrLeScalarSels             func([]byte, *types.Bytes, []int64, []int64) []int64
-	StrLeNullableScalarSels     func([]byte, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-
-	Decimal64Le                    func([]types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64LeNullable            func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64LeSels                func([]types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64LeNullableSels        func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64LeScalar              func(types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64LeNullableScalar      func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64LeScalarSels          func(types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64LeNullableScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128Le                   func([]types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128LeNullable           func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128LeSels               func([]types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128LeNullableSels       func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128LeScalar             func(types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128LeNullableScalar     func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128LeScalarSels         func(types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128LeNullableScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-)
-
-func init() {
-	Int8Le = int8Le
-	Int8LeNullable = int8LeNullable
-	Int8LeSels = int8LeSels
-	Int8LeNullableSels = int8LeNullableSels
-	Int8LeScalar = int8LeScalar
-	Int8LeNullableScalar = int8LeNullableScalar
-	Int8LeScalarSels = int8LeScalarSels
-	Int8LeNullableScalarSels = int8LeNullableScalarSels
-	Int16Le = int16Le
-	Int16LeNullable = int16LeNullable
-	Int16LeSels = int16LeSels
-	Int16LeNullableSels = int16LeNullableSels
-	Int16LeScalar = int16LeScalar
-	Int16LeNullableScalar = int16LeNullableScalar
-	Int16LeScalarSels = int16LeScalarSels
-	Int16LeNullableScalarSels = int16LeNullableScalarSels
-	Int32Le = int32Le
-	Int32LeNullable = int32LeNullable
-	Int32LeSels = int32LeSels
-	Int32LeNullableSels = int32LeNullableSels
-	Int32LeScalar = int32LeScalar
-	Int32LeNullableScalar = int32LeNullableScalar
-	Int32LeScalarSels = int32LeScalarSels
-	Int32LeNullableScalarSels = int32LeNullableScalarSels
-	Int64Le = int64Le
-	Int64LeNullable = int64LeNullable
-	Int64LeSels = int64LeSels
-	Int64LeNullableSels = int64LeNullableSels
-	Int64LeScalar = int64LeScalar
-	Int64LeNullableScalar = int64LeNullableScalar
-	Int64LeScalarSels = int64LeScalarSels
-	Int64LeNullableScalarSels = int64LeNullableScalarSels
-	Uint8Le = uint8Le
-	Uint8LeNullable = uint8LeNullable
-	Uint8LeSels = uint8LeSels
-	Uint8LeNullableSels = uint8LeNullableSels
-	Uint8LeScalar = uint8LeScalar
-	Uint8LeNullableScalar = uint8LeNullableScalar
-	Uint8LeScalarSels = uint8LeScalarSels
-	Uint8LeNullableScalarSels = uint8LeNullableScalarSels
-	Uint16Le = uint16Le
-	Uint16LeNullable = uint16LeNullable
-	Uint16LeSels = uint16LeSels
-	Uint16LeNullableSels = uint16LeNullableSels
-	Uint16LeScalar = uint16LeScalar
-	Uint16LeNullableScalar = uint16LeNullableScalar
-	Uint16LeScalarSels = uint16LeScalarSels
-	Uint16LeNullableScalarSels = uint16LeNullableScalarSels
-	Uint32Le = uint32Le
-	Uint32LeNullable = uint32LeNullable
-	Uint32LeSels = uint32LeSels
-	Uint32LeNullableSels = uint32LeNullableSels
-	Uint32LeScalar = uint32LeScalar
-	Uint32LeNullableScalar = uint32LeNullableScalar
-	Uint32LeScalarSels = uint32LeScalarSels
-	Uint32LeNullableScalarSels = uint32LeNullableScalarSels
-	Uint64Le = uint64Le
-	Uint64LeNullable = uint64LeNullable
-	Uint64LeSels = uint64LeSels
-	Uint64LeNullableSels = uint64LeNullableSels
-	Uint64LeScalar = uint64LeScalar
-	Uint64LeNullableScalar = uint64LeNullableScalar
-	Uint64LeScalarSels = uint64LeScalarSels
-	Uint64LeNullableScalarSels = uint64LeNullableScalarSels
-	Float32Le = float32Le
-	Float32LeNullable = float32LeNullable
-	Float32LeSels = float32LeSels
-	Float32LeNullableSels = float32LeNullableSels
-	Float32LeScalar = float32LeScalar
-	Float32LeNullableScalar = float32LeNullableScalar
-	Float32LeScalarSels = float32LeScalarSels
-	Float32LeNullableScalarSels = float32LeNullableScalarSels
-	Float64Le = float64Le
-	Float64LeNullable = float64LeNullable
-	Float64LeSels = float64LeSels
-	Float64LeNullableSels = float64LeNullableSels
-	Float64LeScalar = float64LeScalar
-	Float64LeNullableScalar = float64LeNullableScalar
-	Float64LeScalarSels = float64LeScalarSels
-	Float64LeNullableScalarSels = float64LeNullableScalarSels
-	StrLe = strLe
-	StrLeNullable = strLeNullable
-	StrLeSels = strLeSels
-	StrLeNullableSels = strLeNullableSels
-	StrLeScalar = strLeScalar
-	StrLeNullableScalar = strLeNullableScalar
-	StrLeScalarSels = strLeScalarSels
+	Int8Le                      = numericLe[int8]
+	Int8LeNullable              = numericLeNullable[int8]
+	Int8LeSels                  = numericLeSels[int8]
+	Int8LeNullableSels          = numericLeNullableSels[int8]
+	Int8LeScalar                = numericLeScalar[int8]
+	Int8LeNullableScalar        = numericLeNullableScalar[int8]
+	Int8LeScalarSels            = numericLeScalarSels[int8]
+	Int8LeNullableScalarSels    = numericLeNullableScalarSels[int8]
+	Int16Le                     = numericLe[int16]
+	Int16LeNullable             = numericLeNullable[int16]
+	Int16LeSels                 = numericLeSels[int16]
+	Int16LeNullableSels         = numericLeNullableSels[int16]
+	Int16LeScalar               = numericLeScalar[int16]
+	Int16LeNullableScalar       = numericLeNullableScalar[int16]
+	Int16LeScalarSels           = numericLeScalarSels[int16]
+	Int16LeNullableScalarSels   = numericLeNullableScalarSels[int16]
+	Int32Le                     = numericLe[int32]
+	Int32LeNullable             = numericLeNullable[int32]
+	Int32LeSels                 = numericLeSels[int32]
+	Int32LeNullableSels         = numericLeNullableSels[int32]
+	Int32LeScalar               = numericLeScalar[int32]
+	Int32LeNullableScalar       = numericLeNullableScalar[int32]
+	Int32LeScalarSels           = numericLeScalarSels[int32]
+	Int32LeNullableScalarSels   = numericLeNullableScalarSels[int32]
+	Int64Le                     = numericLe[int64]
+	Int64LeNullable             = numericLeNullable[int64]
+	Int64LeSels                 = numericLeSels[int64]
+	Int64LeNullableSels         = numericLeNullableSels[int64]
+	Int64LeScalar               = numericLeScalar[int64]
+	Int64LeNullableScalar       = numericLeNullableScalar[int64]
+	Int64LeScalarSels           = numericLeScalarSels[int64]
+	Int64LeNullableScalarSels   = numericLeNullableScalarSels[int64]
+	Uint8Le                     = numericLe[uint8]
+	Uint8LeNullable             = numericLeNullable[uint8]
+	Uint8LeSels                 = numericLeSels[uint8]
+	Uint8LeNullableSels         = numericLeNullableSels[uint8]
+	Uint8LeScalar               = numericLeScalar[uint8]
+	Uint8LeNullableScalar       = numericLeNullableScalar[uint8]
+	Uint8LeScalarSels           = numericLeScalarSels[uint8]
+	Uint8LeNullableScalarSels   = numericLeNullableScalarSels[uint8]
+	Uint16Le                    = numericLe[uint16]
+	Uint16LeNullable            = numericLeNullable[uint16]
+	Uint16LeSels                = numericLeSels[uint16]
+	Uint16LeNullableSels        = numericLeNullableSels[uint16]
+	Uint16LeScalar              = numericLeScalar[uint16]
+	Uint16LeNullableScalar      = numericLeNullableScalar[uint16]
+	Uint16LeScalarSels          = numericLeScalarSels[uint16]
+	Uint16LeNullableScalarSels  = numericLeNullableScalarSels[uint16]
+	Uint32Le                    = numericLe[uint32]
+	Uint32LeNullable            = numericLeNullable[uint32]
+	Uint32LeSels                = numericLeSels[uint32]
+	Uint32LeNullableSels        = numericLeNullableSels[uint32]
+	Uint32LeScalar              = numericLeScalar[uint32]
+	Uint32LeNullableScalar      = numericLeNullableScalar[uint32]
+	Uint32LeScalarSels          = numericLeScalarSels[uint32]
+	Uint32LeNullableScalarSels  = numericLeNullableScalarSels[uint32]
+	Uint64Le                    = numericLe[uint64]
+	Uint64LeNullable            = numericLeNullable[uint64]
+	Uint64LeSels                = numericLeSels[uint64]
+	Uint64LeNullableSels        = numericLeNullableSels[uint64]
+	Uint64LeScalar              = numericLeScalar[uint64]
+	Uint64LeNullableScalar      = numericLeNullableScalar[uint64]
+	Uint64LeScalarSels          = numericLeScalarSels[uint64]
+	Uint64LeNullableScalarSels  = numericLeNullableScalarSels[uint64]
+	Float32Le                   = numericLe[float32]
+	Float32LeNullable           = numericLeNullable[float32]
+	Float32LeSels               = numericLeSels[float32]
+	Float32LeNullableSels       = numericLeNullableSels[float32]
+	Float32LeScalar             = numericLeScalar[float32]
+	Float32LeNullableScalar     = numericLeNullableScalar[float32]
+	Float32LeScalarSels         = numericLeScalarSels[float32]
+	Float32LeNullableScalarSels = numericLeNullableScalarSels[float32]
+	Float64Le                   = numericLe[float64]
+	Float64LeNullable           = numericLeNullable[float64]
+	Float64LeSels               = numericLeSels[float64]
+	Float64LeNullableSels       = numericLeNullableSels[float64]
+	Float64LeScalar             = numericLeScalar[float64]
+	Float64LeNullableScalar     = numericLeNullableScalar[float64]
+	Float64LeScalarSels         = numericLeScalarSels[float64]
+	Float64LeNullableScalarSels = numericLeNullableScalarSels[float64]
+
+	StrLe                   = strLe
+	StrLeNullable           = strLeNullable
+	StrLeSels               = strLeSels
+	StrLeNullableSels       = strLeNullableSels
+	StrLeScalar             = strLeScalar
+	StrLeNullableScalar     = strLeNullableScalar
+	StrLeScalarSels         = strLeScalarSels
 	StrLeNullableScalarSels = strLeNullableScalarSels
-	Decimal64Le = decimal64Le
-	Decimal64LeNullable = decimal64LeNullable
-	Decimal64LeSels = decimal64LeSels
-	Decimal64LeNullableSels = decimal64LeNullableSels
-	Decimal64LeScalar = decimal64LeScalar
-	Decimal64LeNullableScalar = decimal64LeNullableScalar
-	Decimal64LeScalarSels = decimal64LeScalarSels
-	Decimal64LeNullableScalarSels = decimal64LeNullableScalarSels
-	Decimal128Le = decimal128Le
-	Decimal128LeNullable = decimal128LeNullable
-	Decimal128LeSels = decimal128LeSels
-	Decimal128LeNullableSels = decimal128LeNullableSels
-	Decimal128LeScalar = decimal128LeScalar
-	Decimal128LeNullableScalar = decimal128LeNullableScalar
-	Decimal128LeScalarSels = decimal128LeScalarSels
-	Decimal128LeNullableScalarSels = decimal128LeNullableScalarSels
-}
-
-func int8Le(xs, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeNullable(xs, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeSels(xs, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeNullableSels(xs, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeScalar(x int8, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeNullableScalar(x int8, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeScalarSels(x int8, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LeNullableScalarSels(x int8, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16Le(xs, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
 
-func int16LeNullable(xs, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LeSels(xs, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LeNullableSels(xs, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LeScalar(x int16, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LeNullableScalar(x int16, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LeScalarSels(x int16, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LeNullableScalarSels(x int16, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32Le(xs, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeNullable(xs, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeSels(xs, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeNullableSels(xs, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeScalar(x int32, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeNullableScalar(x int32, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeScalarSels(x int32, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LeNullableScalarSels(x int32, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64Le(xs, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeNullable(xs, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeSels(xs, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeNullableSels(xs, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeScalar(x int64, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeNullableScalar(x int64, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeScalarSels(x int64, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LeNullableScalarSels(x int64, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8Le(xs, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeNullable(xs, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeSels(xs, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeNullableSels(xs, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeScalar(x uint8, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeNullableScalar(x uint8, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeScalarSels(x uint8, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LeNullableScalarSels(x uint8, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16Le(xs, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeNullable(xs, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeSels(xs, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeNullableSels(xs, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeScalar(x uint16, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeNullableScalar(x uint16, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeScalarSels(x uint16, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LeNullableScalarSels(x uint16, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32Le(xs, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeNullable(xs, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeSels(xs, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeNullableSels(xs, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeScalar(x uint32, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeNullableScalar(x uint32, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeScalarSels(x uint32, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LeNullableScalarSels(x uint32, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64Le(xs, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeNullable(xs, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeSels(xs, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeNullableSels(xs, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeScalar(x uint64, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeNullableScalar(x uint64, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeScalarSels(x uint64, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LeNullableScalarSels(x uint64, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32Le(xs, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeSels(xs, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeScalar(x float32, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x <= y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LeNullableScalarSels(x float32, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
+	Decimal64Le                    = decimal64Le
+	Decimal64LeNullable            = decimal64LeNullable
+	Decimal64LeSels                = decimal64LeSels
+	Decimal64LeNullableSels        = decimal64LeNullableSels
+	Decimal64LeScalar              = decimal64LeScalar
+	Decimal64LeNullableScalar      = decimal64LeNullableScalar
+	Decimal64LeScalarSels          = decimal64LeScalarSels
+	Decimal64LeNullableScalarSels  = decimal64LeNullableScalarSels
+	Decimal128Le                   = decimal128Le
+	Decimal128LeNullable           = decimal128LeNullable
+	Decimal128LeSels               = decimal128LeSels
+	Decimal128LeNullableSels       = decimal128LeNullableSels
+	Decimal128LeScalar             = decimal128LeScalar
+	Decimal128LeNullableScalar     = decimal128LeNullableScalar
+	Decimal128LeScalarSels         = decimal128LeScalarSels
+	Decimal128LeNullableScalarSels = decimal128LeNullableScalarSels
+)
 
-func float64Le(xs, ys []float64, rs []int64) []int64 {
+func numericLe[T constraints.Integer | constraints.Float](xs, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, x := range xs {
 		if x <= ys[i] {
@@ -1310,7 +144,7 @@ func float64Le(xs, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LeNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericLeNullable[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1336,7 +170,7 @@ func float64LeNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []in
 	return rs[:rsi]
 }
 
-func float64LeSels(xs, ys []float64, rs, sels []int64) []int64 {
+func numericLeSels[T constraints.Integer | constraints.Float](xs, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if xs[sel] <= ys[sel] {
@@ -1347,7 +181,7 @@ func float64LeSels(xs, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LeNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericLeNullableSels[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && xs[sel] <= ys[sel] {
@@ -1358,7 +192,7 @@ func float64LeNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []i
 	return rs[:rsi]
 }
 
-func float64LeScalar(x float64, ys []float64, rs []int64) []int64 {
+func numericLeScalar[T constraints.Integer | constraints.Float](x T, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, y := range ys {
 		if x <= y {
@@ -1369,7 +203,7 @@ func float64LeScalar(x float64, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LeNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericLeNullableScalar[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1395,7 +229,7 @@ func float64LeNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs
 	return rs[:rsi]
 }
 
-func float64LeScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
+func numericLeScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if x <= ys[sel] {
@@ -1406,7 +240,7 @@ func float64LeScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LeNullableScalarSels(x float64, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericLeNullableScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && x <= ys[sel] {
diff --git a/pkg/vectorize/length/length.go b/pkg/vectorize/length/length.go
index 864a5d3758ccbd29bb7a64dffc039510013def11..5b1798f6ca35854d0956756dba79e729c3ee7a24 100644
--- a/pkg/vectorize/length/length.go
+++ b/pkg/vectorize/length/length.go
@@ -22,6 +22,10 @@ var (
 	StrLength func(*types.Bytes, []int64) []int64
 )
 
+func init() {
+	StrLength = strLength
+}
+
 func strLength(xs *types.Bytes, rs []int64) []int64 {
 	for i, n := range xs.Lengths {
 		rs[i] = int64(n)
diff --git a/pkg/vectorize/length/length_amd64.go b/pkg/vectorize/length/length_amd64.go
index 4298c2ac8afe359a68fcab88448e1fee12d51f61..2a3168c10a214f923d9a3ed80b913e0e15e7c7c7 100644
--- a/pkg/vectorize/length/length_amd64.go
+++ b/pkg/vectorize/length/length_amd64.go
@@ -28,8 +28,6 @@ func init() {
 		StrLength = strLengthAvx512
 	} else if cpu.X86.HasAVX2 {
 		StrLength = strLengthAvx2
-	} else {
-		StrLength = strLength
 	}
 }
 
diff --git a/pkg/vectorize/length/length_arm64.go b/pkg/vectorize/length/length_arm64.go
deleted file mode 100644
index 27982d15ad21ab9605e4b308c52d90e754ee2ae9..0000000000000000000000000000000000000000
--- a/pkg/vectorize/length/length_arm64.go
+++ /dev/null
@@ -1,19 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package length
-
-func init() {
-	StrLength = strLength
-}
diff --git a/pkg/vectorize/length/avx2.s b/pkg/vectorize/length/length_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/length/avx2.s
rename to pkg/vectorize/length/length_avx2_amd64.s
index 164258fd2d149a6d053bd3a178260c25d204f552..2e58c7e820d30b87813b21572a487f2153181b95 100644
--- a/pkg/vectorize/length/avx2.s
+++ b/pkg/vectorize/length/length_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/length/avx512.s b/pkg/vectorize/length/length_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/length/avx512.s
rename to pkg/vectorize/length/length_avx512_amd64.s
index 814f313b3a96a8d71b15dd21eeffb10517ed3229..16d2e308c9faf3fcfdea0ac7b4c477286ba52cee 100644
--- a/pkg/vectorize/length/avx512.s
+++ b/pkg/vectorize/length/length_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/lt/lt.go b/pkg/vectorize/lt/lt.go
index 88cb533762c0e3167b0d82269096539023dde327..57b20d44d82596ffe7ead8ab4d9b34bba863e38d 100644
--- a/pkg/vectorize/lt/lt.go
+++ b/pkg/vectorize/lt/lt.go
@@ -16,1290 +16,124 @@ package lt
 
 import (
 	"bytes"
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
 
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
+
 	roaring "github.com/RoaringBitmap/roaring/roaring64"
 )
 
 var (
-	Int8Lt                      func([]int8, []int8, []int64) []int64
-	Int8LtNullable              func([]int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8LtSels                  func([]int8, []int8, []int64, []int64) []int64
-	Int8LtNullableSels          func([]int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int8LtScalar                func(int8, []int8, []int64) []int64
-	Int8LtNullableScalar        func(int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8LtScalarSels            func(int8, []int8, []int64, []int64) []int64
-	Int8LtNullableScalarSels    func(int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int16Lt                     func([]int16, []int16, []int64) []int64
-	Int16LtNullable             func([]int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16LtSels                 func([]int16, []int16, []int64, []int64) []int64
-	Int16LtNullableSels         func([]int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int16LtScalar               func(int16, []int16, []int64) []int64
-	Int16LtNullableScalar       func(int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16LtScalarSels           func(int16, []int16, []int64, []int64) []int64
-	Int16LtNullableScalarSels   func(int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int32Lt                     func([]int32, []int32, []int64) []int64
-	Int32LtNullable             func([]int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32LtSels                 func([]int32, []int32, []int64, []int64) []int64
-	Int32LtNullableSels         func([]int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int32LtScalar               func(int32, []int32, []int64) []int64
-	Int32LtNullableScalar       func(int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32LtScalarSels           func(int32, []int32, []int64, []int64) []int64
-	Int32LtNullableScalarSels   func(int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int64Lt                     func([]int64, []int64, []int64) []int64
-	Int64LtNullable             func([]int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64LtSels                 func([]int64, []int64, []int64, []int64) []int64
-	Int64LtNullableSels         func([]int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Int64LtScalar               func(int64, []int64, []int64) []int64
-	Int64LtNullableScalar       func(int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64LtScalarSels           func(int64, []int64, []int64, []int64) []int64
-	Int64LtNullableScalarSels   func(int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8Lt                     func([]uint8, []uint8, []int64) []int64
-	Uint8LtNullable             func([]uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8LtSels                 func([]uint8, []uint8, []int64, []int64) []int64
-	Uint8LtNullableSels         func([]uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8LtScalar               func(uint8, []uint8, []int64) []int64
-	Uint8LtNullableScalar       func(uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8LtScalarSels           func(uint8, []uint8, []int64, []int64) []int64
-	Uint8LtNullableScalarSels   func(uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16Lt                    func([]uint16, []uint16, []int64) []int64
-	Uint16LtNullable            func([]uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16LtSels                func([]uint16, []uint16, []int64, []int64) []int64
-	Uint16LtNullableSels        func([]uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16LtScalar              func(uint16, []uint16, []int64) []int64
-	Uint16LtNullableScalar      func(uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16LtScalarSels          func(uint16, []uint16, []int64, []int64) []int64
-	Uint16LtNullableScalarSels  func(uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32Lt                    func([]uint32, []uint32, []int64) []int64
-	Uint32LtNullable            func([]uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32LtSels                func([]uint32, []uint32, []int64, []int64) []int64
-	Uint32LtNullableSels        func([]uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32LtScalar              func(uint32, []uint32, []int64) []int64
-	Uint32LtNullableScalar      func(uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32LtScalarSels          func(uint32, []uint32, []int64, []int64) []int64
-	Uint32LtNullableScalarSels  func(uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64Lt                    func([]uint64, []uint64, []int64) []int64
-	Uint64LtNullable            func([]uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64LtSels                func([]uint64, []uint64, []int64, []int64) []int64
-	Uint64LtNullableSels        func([]uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64LtScalar              func(uint64, []uint64, []int64) []int64
-	Uint64LtNullableScalar      func(uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64LtScalarSels          func(uint64, []uint64, []int64, []int64) []int64
-	Uint64LtNullableScalarSels  func(uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Float32Lt                   func([]float32, []float32, []int64) []int64
-	Float32LtNullable           func([]float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32LtSels               func([]float32, []float32, []int64, []int64) []int64
-	Float32LtNullableSels       func([]float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float32LtScalar             func(float32, []float32, []int64) []int64
-	Float32LtNullableScalar     func(float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32LtScalarSels         func(float32, []float32, []int64, []int64) []int64
-	Float32LtNullableScalarSels func(float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float64Lt                   func([]float64, []float64, []int64) []int64
-	Float64LtNullable           func([]float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64LtSels               func([]float64, []float64, []int64, []int64) []int64
-	Float64LtNullableSels       func([]float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	Float64LtScalar             func(float64, []float64, []int64) []int64
-	Float64LtNullableScalar     func(float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64LtScalarSels         func(float64, []float64, []int64, []int64) []int64
-	Float64LtNullableScalarSels func(float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	StrLt                       func(*types.Bytes, *types.Bytes, []int64) []int64
-	StrLtNullable               func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrLtSels                   func(*types.Bytes, *types.Bytes, []int64, []int64) []int64
-	StrLtNullableSels           func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	StrLtScalar                 func([]byte, *types.Bytes, []int64) []int64
-	StrLtNullableScalar         func([]byte, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrLtScalarSels             func([]byte, *types.Bytes, []int64, []int64) []int64
-	StrLtNullableScalarSels     func([]byte, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-
-	Decimal64Lt                    func([]types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64LtNullable            func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64LtSels                func([]types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64LtNullableSels        func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64LtScalar              func(types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64LtNullableScalar      func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64LtScalarSels          func(types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64LtNullableScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128Lt                   func([]types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128LtNullable           func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128LtSels               func([]types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128LtNullableSels       func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128LtScalar             func(types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128LtNullableScalar     func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128LtScalarSels         func(types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128LtNullableScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-)
-
-func init() {
-	Int8Lt = int8Lt
-	Int8LtNullable = int8LtNullable
-	Int8LtSels = int8LtSels
-	Int8LtNullableSels = int8LtNullableSels
-	Int8LtScalar = int8LtScalar
-	Int8LtNullableScalar = int8LtNullableScalar
-	Int8LtScalarSels = int8LtScalarSels
-	Int8LtNullableScalarSels = int8LtNullableScalarSels
-	Int16Lt = int16Lt
-	Int16LtNullable = int16LtNullable
-	Int16LtSels = int16LtSels
-	Int16LtNullableSels = int16LtNullableSels
-	Int16LtScalar = int16LtScalar
-	Int16LtNullableScalar = int16LtNullableScalar
-	Int16LtScalarSels = int16LtScalarSels
-	Int16LtNullableScalarSels = int16LtNullableScalarSels
-	Int32Lt = int32Lt
-	Int32LtNullable = int32LtNullable
-	Int32LtSels = int32LtSels
-	Int32LtNullableSels = int32LtNullableSels
-	Int32LtScalar = int32LtScalar
-	Int32LtNullableScalar = int32LtNullableScalar
-	Int32LtScalarSels = int32LtScalarSels
-	Int32LtNullableScalarSels = int32LtNullableScalarSels
-	Int64Lt = int64Lt
-	Int64LtNullable = int64LtNullable
-	Int64LtSels = int64LtSels
-	Int64LtNullableSels = int64LtNullableSels
-	Int64LtScalar = int64LtScalar
-	Int64LtNullableScalar = int64LtNullableScalar
-	Int64LtScalarSels = int64LtScalarSels
-	Int64LtNullableScalarSels = int64LtNullableScalarSels
-	Uint8Lt = uint8Lt
-	Uint8LtNullable = uint8LtNullable
-	Uint8LtSels = uint8LtSels
-	Uint8LtNullableSels = uint8LtNullableSels
-	Uint8LtScalar = uint8LtScalar
-	Uint8LtNullableScalar = uint8LtNullableScalar
-	Uint8LtScalarSels = uint8LtScalarSels
-	Uint8LtNullableScalarSels = uint8LtNullableScalarSels
-	Uint16Lt = uint16Lt
-	Uint16LtNullable = uint16LtNullable
-	Uint16LtSels = uint16LtSels
-	Uint16LtNullableSels = uint16LtNullableSels
-	Uint16LtScalar = uint16LtScalar
-	Uint16LtNullableScalar = uint16LtNullableScalar
-	Uint16LtScalarSels = uint16LtScalarSels
-	Uint16LtNullableScalarSels = uint16LtNullableScalarSels
-	Uint32Lt = uint32Lt
-	Uint32LtNullable = uint32LtNullable
-	Uint32LtSels = uint32LtSels
-	Uint32LtNullableSels = uint32LtNullableSels
-	Uint32LtScalar = uint32LtScalar
-	Uint32LtNullableScalar = uint32LtNullableScalar
-	Uint32LtScalarSels = uint32LtScalarSels
-	Uint32LtNullableScalarSels = uint32LtNullableScalarSels
-	Uint64Lt = uint64Lt
-	Uint64LtNullable = uint64LtNullable
-	Uint64LtSels = uint64LtSels
-	Uint64LtNullableSels = uint64LtNullableSels
-	Uint64LtScalar = uint64LtScalar
-	Uint64LtNullableScalar = uint64LtNullableScalar
-	Uint64LtScalarSels = uint64LtScalarSels
-	Uint64LtNullableScalarSels = uint64LtNullableScalarSels
-	Float32Lt = float32Lt
-	Float32LtNullable = float32LtNullable
-	Float32LtSels = float32LtSels
-	Float32LtNullableSels = float32LtNullableSels
-	Float32LtScalar = float32LtScalar
-	Float32LtNullableScalar = float32LtNullableScalar
-	Float32LtScalarSels = float32LtScalarSels
-	Float32LtNullableScalarSels = float32LtNullableScalarSels
-	Float64Lt = float64Lt
-	Float64LtNullable = float64LtNullable
-	Float64LtSels = float64LtSels
-	Float64LtNullableSels = float64LtNullableSels
-	Float64LtScalar = float64LtScalar
-	Float64LtNullableScalar = float64LtNullableScalar
-	Float64LtScalarSels = float64LtScalarSels
-	Float64LtNullableScalarSels = float64LtNullableScalarSels
-	StrLt = strLt
-	StrLtNullable = strLtNullable
-	StrLtSels = strLtSels
-	StrLtNullableSels = strLtNullableSels
-	StrLtScalar = strLtScalar
-	StrLtNullableScalar = strLtNullableScalar
-	StrLtScalarSels = strLtScalarSels
+	Int8Lt                      = numericLt[int8]
+	Int8LtNullable              = numericLtNullable[int8]
+	Int8LtSels                  = numericLtSels[int8]
+	Int8LtNullableSels          = numericLtNullableSels[int8]
+	Int8LtScalar                = numericLtScalar[int8]
+	Int8LtNullableScalar        = numericLtNullableScalar[int8]
+	Int8LtScalarSels            = numericLtScalarSels[int8]
+	Int8LtNullableScalarSels    = numericLtNullableScalarSels[int8]
+	Int16Lt                     = numericLt[int16]
+	Int16LtNullable             = numericLtNullable[int16]
+	Int16LtSels                 = numericLtSels[int16]
+	Int16LtNullableSels         = numericLtNullableSels[int16]
+	Int16LtScalar               = numericLtScalar[int16]
+	Int16LtNullableScalar       = numericLtNullableScalar[int16]
+	Int16LtScalarSels           = numericLtScalarSels[int16]
+	Int16LtNullableScalarSels   = numericLtNullableScalarSels[int16]
+	Int32Lt                     = numericLt[int32]
+	Int32LtNullable             = numericLtNullable[int32]
+	Int32LtSels                 = numericLtSels[int32]
+	Int32LtNullableSels         = numericLtNullableSels[int32]
+	Int32LtScalar               = numericLtScalar[int32]
+	Int32LtNullableScalar       = numericLtNullableScalar[int32]
+	Int32LtScalarSels           = numericLtScalarSels[int32]
+	Int32LtNullableScalarSels   = numericLtNullableScalarSels[int32]
+	Int64Lt                     = numericLt[int64]
+	Int64LtNullable             = numericLtNullable[int64]
+	Int64LtSels                 = numericLtSels[int64]
+	Int64LtNullableSels         = numericLtNullableSels[int64]
+	Int64LtScalar               = numericLtScalar[int64]
+	Int64LtNullableScalar       = numericLtNullableScalar[int64]
+	Int64LtScalarSels           = numericLtScalarSels[int64]
+	Int64LtNullableScalarSels   = numericLtNullableScalarSels[int64]
+	Uint8Lt                     = numericLt[uint8]
+	Uint8LtNullable             = numericLtNullable[uint8]
+	Uint8LtSels                 = numericLtSels[uint8]
+	Uint8LtNullableSels         = numericLtNullableSels[uint8]
+	Uint8LtScalar               = numericLtScalar[uint8]
+	Uint8LtNullableScalar       = numericLtNullableScalar[uint8]
+	Uint8LtScalarSels           = numericLtScalarSels[uint8]
+	Uint8LtNullableScalarSels   = numericLtNullableScalarSels[uint8]
+	Uint16Lt                    = numericLt[uint16]
+	Uint16LtNullable            = numericLtNullable[uint16]
+	Uint16LtSels                = numericLtSels[uint16]
+	Uint16LtNullableSels        = numericLtNullableSels[uint16]
+	Uint16LtScalar              = numericLtScalar[uint16]
+	Uint16LtNullableScalar      = numericLtNullableScalar[uint16]
+	Uint16LtScalarSels          = numericLtScalarSels[uint16]
+	Uint16LtNullableScalarSels  = numericLtNullableScalarSels[uint16]
+	Uint32Lt                    = numericLt[uint32]
+	Uint32LtNullable            = numericLtNullable[uint32]
+	Uint32LtSels                = numericLtSels[uint32]
+	Uint32LtNullableSels        = numericLtNullableSels[uint32]
+	Uint32LtScalar              = numericLtScalar[uint32]
+	Uint32LtNullableScalar      = numericLtNullableScalar[uint32]
+	Uint32LtScalarSels          = numericLtScalarSels[uint32]
+	Uint32LtNullableScalarSels  = numericLtNullableScalarSels[uint32]
+	Uint64Lt                    = numericLt[uint64]
+	Uint64LtNullable            = numericLtNullable[uint64]
+	Uint64LtSels                = numericLtSels[uint64]
+	Uint64LtNullableSels        = numericLtNullableSels[uint64]
+	Uint64LtScalar              = numericLtScalar[uint64]
+	Uint64LtNullableScalar      = numericLtNullableScalar[uint64]
+	Uint64LtScalarSels          = numericLtScalarSels[uint64]
+	Uint64LtNullableScalarSels  = numericLtNullableScalarSels[uint64]
+	Float32Lt                   = numericLt[float32]
+	Float32LtNullable           = numericLtNullable[float32]
+	Float32LtSels               = numericLtSels[float32]
+	Float32LtNullableSels       = numericLtNullableSels[float32]
+	Float32LtScalar             = numericLtScalar[float32]
+	Float32LtNullableScalar     = numericLtNullableScalar[float32]
+	Float32LtScalarSels         = numericLtScalarSels[float32]
+	Float32LtNullableScalarSels = numericLtNullableScalarSels[float32]
+	Float64Lt                   = numericLt[float64]
+	Float64LtNullable           = numericLtNullable[float64]
+	Float64LtSels               = numericLtSels[float64]
+	Float64LtNullableSels       = numericLtNullableSels[float64]
+	Float64LtScalar             = numericLtScalar[float64]
+	Float64LtNullableScalar     = numericLtNullableScalar[float64]
+	Float64LtScalarSels         = numericLtScalarSels[float64]
+	Float64LtNullableScalarSels = numericLtNullableScalarSels[float64]
+
+	StrLt                   = strLt
+	StrLtNullable           = strLtNullable
+	StrLtSels               = strLtSels
+	StrLtNullableSels       = strLtNullableSels
+	StrLtScalar             = strLtScalar
+	StrLtNullableScalar     = strLtNullableScalar
+	StrLtScalarSels         = strLtScalarSels
 	StrLtNullableScalarSels = strLtNullableScalarSels
-	Decimal64Lt = decimal64Lt
-	Decimal64LtNullable = decimal64LtNullable
-	Decimal64LtSels = decimal64LtSels
-	Decimal64LtNullableSels = decimal64LtNullableSels
-	Decimal64LtScalar = decimal64LtScalar
-	Decimal64LtNullableScalar = decimal64LtNullableScalar
-	Decimal64LtScalarSels = decimal64LtScalarSels
-	Decimal64LtNullableScalarSels = decimal64LtNullableScalarSels
-	Decimal128Lt = decimal128Lt
-	Decimal128LtNullable = decimal128LtNullable
-	Decimal128LtSels = decimal128LtSels
-	Decimal128LtNullableSels = decimal128LtNullableSels
-	Decimal128LtScalar = decimal128LtScalar
-	Decimal128LtNullableScalar = decimal128LtNullableScalar
-	Decimal128LtScalarSels = decimal128LtScalarSels
-	Decimal128LtNullableScalarSels = decimal128LtNullableScalarSels
-}
-
-func int8Lt(xs, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtNullable(xs, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtSels(xs, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtNullableSels(xs, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtScalar(x int8, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtNullableScalar(x int8, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtScalarSels(x int8, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8LtNullableScalarSels(x int8, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16Lt(xs, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
 
-func int16LtNullable(xs, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LtSels(xs, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LtNullableSels(xs, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LtScalar(x int16, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LtNullableScalar(x int16, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LtScalarSels(x int16, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16LtNullableScalarSels(x int16, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32Lt(xs, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtNullable(xs, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtSels(xs, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtNullableSels(xs, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtScalar(x int32, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtNullableScalar(x int32, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtScalarSels(x int32, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32LtNullableScalarSels(x int32, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64Lt(xs, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtNullable(xs, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtSels(xs, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtNullableSels(xs, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtScalar(x int64, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtNullableScalar(x int64, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtScalarSels(x int64, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64LtNullableScalarSels(x int64, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8Lt(xs, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtNullable(xs, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtSels(xs, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtNullableSels(xs, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtScalar(x uint8, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtNullableScalar(x uint8, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtScalarSels(x uint8, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8LtNullableScalarSels(x uint8, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16Lt(xs, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtNullable(xs, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtSels(xs, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtNullableSels(xs, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtScalar(x uint16, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtNullableScalar(x uint16, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtScalarSels(x uint16, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16LtNullableScalarSels(x uint16, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32Lt(xs, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtNullable(xs, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtSels(xs, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtNullableSels(xs, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtScalar(x uint32, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtNullableScalar(x uint32, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtScalarSels(x uint32, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32LtNullableScalarSels(x uint32, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64Lt(xs, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtNullable(xs, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtSels(xs, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtNullableSels(xs, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtScalar(x uint64, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtNullableScalar(x uint64, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtScalarSels(x uint64, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64LtNullableScalarSels(x uint64, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32Lt(xs, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtSels(xs, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtScalar(x float32, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x < y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32LtNullableScalarSels(x float32, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
+	Decimal64Lt                    = decimal64Lt
+	Decimal64LtNullable            = decimal64LtNullable
+	Decimal64LtSels                = decimal64LtSels
+	Decimal64LtNullableSels        = decimal64LtNullableSels
+	Decimal64LtScalar              = decimal64LtScalar
+	Decimal64LtNullableScalar      = decimal64LtNullableScalar
+	Decimal64LtScalarSels          = decimal64LtScalarSels
+	Decimal64LtNullableScalarSels  = decimal64LtNullableScalarSels
+	Decimal128Lt                   = decimal128Lt
+	Decimal128LtNullable           = decimal128LtNullable
+	Decimal128LtSels               = decimal128LtSels
+	Decimal128LtNullableSels       = decimal128LtNullableSels
+	Decimal128LtScalar             = decimal128LtScalar
+	Decimal128LtNullableScalar     = decimal128LtNullableScalar
+	Decimal128LtScalarSels         = decimal128LtScalarSels
+	Decimal128LtNullableScalarSels = decimal128LtNullableScalarSels
+)
 
-func float64Lt(xs, ys []float64, rs []int64) []int64 {
+func numericLt[T constraints.Integer | constraints.Float](xs, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, x := range xs {
 		if x < ys[i] {
@@ -1310,7 +144,7 @@ func float64Lt(xs, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LtNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericLtNullable[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1336,7 +170,7 @@ func float64LtNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []in
 	return rs[:rsi]
 }
 
-func float64LtSels(xs, ys []float64, rs, sels []int64) []int64 {
+func numericLtSels[T constraints.Integer | constraints.Float](xs, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if xs[sel] < ys[sel] {
@@ -1347,7 +181,7 @@ func float64LtSels(xs, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LtNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericLtNullableSels[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && xs[sel] < ys[sel] {
@@ -1358,7 +192,7 @@ func float64LtNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []i
 	return rs[:rsi]
 }
 
-func float64LtScalar(x float64, ys []float64, rs []int64) []int64 {
+func numericLtScalar[T constraints.Integer | constraints.Float](x T, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, y := range ys {
 		if x < y {
@@ -1369,7 +203,7 @@ func float64LtScalar(x float64, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LtNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericLtNullableScalar[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1395,7 +229,7 @@ func float64LtNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs
 	return rs[:rsi]
 }
 
-func float64LtScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
+func numericLtScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if x < ys[sel] {
@@ -1406,7 +240,7 @@ func float64LtScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64LtNullableScalarSels(x float64, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericLtNullableScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && x < ys[sel] {
diff --git a/pkg/vectorize/max/max.go b/pkg/vectorize/max/max.go
index 8278613cdaf13279a2d074d9dbbfbd6057ca5d4b..ad3fc9d121ff7c68bb6462bb0657a827aa8b3cdc 100644
--- a/pkg/vectorize/max/max.go
+++ b/pkg/vectorize/max/max.go
@@ -18,33 +18,36 @@ import (
 	"bytes"
 
 	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
 var (
-	BoolMax        func([]bool) bool
-	BoolMaxSels    func([]bool, []int64) bool
-	Int8Max        func([]int8) int8
-	Int8MaxSels    func([]int8, []int64) int8
-	Int16Max       func([]int16) int16
-	Int16MaxSels   func([]int16, []int64) int16
-	Int32Max       func([]int32) int32
-	Int32MaxSels   func([]int32, []int64) int32
-	Int64Max       func([]int64) int64
-	Int64MaxSels   func([]int64, []int64) int64
-	Uint8Max       func([]uint8) uint8
-	Uint8MaxSels   func([]uint8, []int64) uint8
-	Uint16Max      func([]uint16) uint16
-	Uint16MaxSels  func([]uint16, []int64) uint16
-	Uint32Max      func([]uint32) uint32
-	Uint32MaxSels  func([]uint32, []int64) uint32
-	Uint64Max      func([]uint64) uint64
-	Uint64MaxSels  func([]uint64, []int64) uint64
-	Float32Max     func([]float32) float32
-	Float32MaxSels func([]float32, []int64) float32
-	Float64Max     func([]float64) float64
-	Float64MaxSels func([]float64, []int64) float64
-	StrMax         func(*types.Bytes) []byte
-	StrMaxSels     func(*types.Bytes, []int64) []byte
+	BoolMax     = boolMax
+	BoolMaxSels = boolMaxSels
+
+	Int8Max        = numericMax[int8]
+	Int16Max       = numericMax[int16]
+	Int32Max       = numericMax[int32]
+	Int64Max       = numericMax[int64]
+	Uint8Max       = numericMax[uint8]
+	Uint16Max      = numericMax[uint16]
+	Uint32Max      = numericMax[uint32]
+	Uint64Max      = numericMax[uint64]
+	Float32Max     = numericMax[float32]
+	Float64Max     = numericMax[float64]
+	Int8MaxSels    = numericMaxSels[int8]
+	Int16MaxSels   = numericMaxSels[int16]
+	Int32MaxSels   = numericMaxSels[int32]
+	Int64MaxSels   = numericMaxSels[int64]
+	Uint8MaxSels   = numericMaxSels[uint8]
+	Uint16MaxSels  = numericMaxSels[uint16]
+	Uint32MaxSels  = numericMaxSels[uint32]
+	Uint64MaxSels  = numericMaxSels[uint64]
+	Float32MaxSels = numericMaxSels[float32]
+	Float64MaxSels = numericMaxSels[float64]
+
+	StrMax     = strMax
+	StrMaxSels = strMaxSels
 )
 
 func boolMax(xs []bool) bool {
@@ -65,7 +68,7 @@ func boolMaxSels(xs []bool, sels []int64) bool {
 	return false
 }
 
-func int8Max(xs []int8) int8 {
+func numericMax[T constraints.Integer | constraints.Float](xs []T) T {
 	res := xs[0]
 	for _, x := range xs {
 		if x > res {
@@ -75,196 +78,7 @@ func int8Max(xs []int8) int8 {
 	return res
 }
 
-func int8MaxSels(xs []int8, sels []int64) int8 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int16Max(xs []int16) int16 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int16MaxSels(xs []int16, sels []int64) int16 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int32Max(xs []int32) int32 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int32MaxSels(xs []int32, sels []int64) int32 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int64Max(xs []int64) int64 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int64MaxSels(xs []int64, sels []int64) int64 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint8Max(xs []uint8) uint8 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint8MaxSels(xs []uint8, sels []int64) uint8 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint16Max(xs []uint16) uint16 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint16MaxSels(xs []uint16, sels []int64) uint16 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint32Max(xs []uint32) uint32 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint32MaxSels(xs []uint32, sels []int64) uint32 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint64Max(xs []uint64) uint64 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint64MaxSels(xs []uint64, sels []int64) uint64 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float32Max(xs []float32) float32 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float32MaxSels(xs []float32, sels []int64) float32 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float64Max(xs []float64) float64 {
-	res := xs[0]
-	for _, x := range xs {
-		if x > res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float64MaxSels(xs []float64, sels []int64) float64 {
+func numericMaxSels[T constraints.Integer | constraints.Float](xs []T, sels []int64) T {
 	res := xs[sels[0]]
 	for _, sel := range sels {
 		x := xs[sel]
diff --git a/pkg/vectorize/max/max_amd64.go b/pkg/vectorize/max/max_amd64.go
index 171cb2ae4cf72aaa0fd424d2f06950e37b1294c4..49c6efe4123472857a3c56f1205b842789752151 100644
--- a/pkg/vectorize/max/max_amd64.go
+++ b/pkg/vectorize/max/max_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package max
 
 import (
@@ -56,41 +53,12 @@ func init() {
 		Int8Max = int8MaxAvx2
 		Int16Max = int16MaxAvx2
 		Int32Max = int32MaxAvx2
-		Int64Max = int64Max
 		Uint8Max = uint8MaxAvx2
 		Uint16Max = uint16MaxAvx2
 		Uint32Max = uint32MaxAvx2
-		Uint64Max = uint64Max
 		Float32Max = float32MaxAvx2
 		Float64Max = float64MaxAvx2
-	} else {
-		Int8Max = int8Max
-		Int16Max = int16Max
-		Int32Max = int32Max
-		Int64Max = int64Max
-		Uint8Max = uint8Max
-		Uint16Max = uint16Max
-		Uint32Max = uint32Max
-		Uint64Max = uint64Max
-		Float32Max = float32Max
-		Float64Max = float64Max
 	}
-
-	BoolMax = boolMax
-	StrMax = strMax
-
-	BoolMaxSels = boolMaxSels
-	Int8MaxSels = int8MaxSels
-	Int16MaxSels = int16MaxSels
-	Int32MaxSels = int32MaxSels
-	Int64MaxSels = int64MaxSels
-	Uint8MaxSels = uint8MaxSels
-	Uint16MaxSels = uint16MaxSels
-	Uint32MaxSels = uint32MaxSels
-	Uint64MaxSels = uint64MaxSels
-	Float32MaxSels = float32MaxSels
-	Float64MaxSels = float64MaxSels
-	StrMaxSels = strMaxSels
 }
 
 func int8MaxAvx2(xs []int8) int8 {
diff --git a/pkg/vectorize/max/max_arm64.go b/pkg/vectorize/max/max_arm64.go
deleted file mode 100644
index 3070238d5d0509371865126af8f19f13b1a76cbf..0000000000000000000000000000000000000000
--- a/pkg/vectorize/max/max_arm64.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build arm64
-// +build arm64
-
-package max
-
-func init() {
-	BoolMax = boolMax
-	Int8Max = int8Max
-	Int16Max = int16Max
-	Int32Max = int32Max
-	Int64Max = int64Max
-	Uint8Max = uint8Max
-	Uint16Max = uint16Max
-	Uint32Max = uint32Max
-	Uint64Max = uint64Max
-	Float32Max = float32Max
-	Float64Max = float64Max
-	StrMax = strMax
-
-	BoolMaxSels = boolMaxSels
-	Int8MaxSels = int8MaxSels
-	Int16MaxSels = int16MaxSels
-	Int32MaxSels = int32MaxSels
-	Int64MaxSels = int64MaxSels
-	Uint8MaxSels = uint8MaxSels
-	Uint16MaxSels = uint16MaxSels
-	Uint32MaxSels = uint32MaxSels
-	Uint64MaxSels = uint64MaxSels
-	Float32MaxSels = float32MaxSels
-	Float64MaxSels = float64MaxSels
-	StrMaxSels = strMaxSels
-}
diff --git a/pkg/vectorize/max/avx2.s b/pkg/vectorize/max/max_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/max/avx2.s
rename to pkg/vectorize/max/max_avx2_amd64.s
index 6bdecea914a43d98ce1f3fa1112d0d15fabe9dde..796c6844a08e55e50531229568d87a0e477decf4 100644
--- a/pkg/vectorize/max/avx2.s
+++ b/pkg/vectorize/max/max_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/max/avx512.s b/pkg/vectorize/max/max_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/max/avx512.s
rename to pkg/vectorize/max/max_avx512_amd64.s
index a9919bd0e6e54621480f22102e6476823ff86246..77902d6ec09dcdb68edeab545566b5015409ca52 100644
--- a/pkg/vectorize/max/avx512.s
+++ b/pkg/vectorize/max/max_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/min/min.go b/pkg/vectorize/min/min.go
index d7ab1f7cd18fc8f0afe9fde267bc760a77f55088..d849b4394ecf312eb0103411c6b3439ae80f0c18 100644
--- a/pkg/vectorize/min/min.go
+++ b/pkg/vectorize/min/min.go
@@ -18,33 +18,36 @@ import (
 	"bytes"
 
 	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
 var (
-	BoolMin        func([]bool) bool
-	BoolMinSels    func([]bool, []int64) bool
-	Int8Min        func([]int8) int8
-	Int8MinSels    func([]int8, []int64) int8
-	Int16Min       func([]int16) int16
-	Int16MinSels   func([]int16, []int64) int16
-	Int32Min       func([]int32) int32
-	Int32MinSels   func([]int32, []int64) int32
-	Int64Min       func([]int64) int64
-	Int64MinSels   func([]int64, []int64) int64
-	Uint8Min       func([]uint8) uint8
-	Uint8MinSels   func([]uint8, []int64) uint8
-	Uint16Min      func([]uint16) uint16
-	Uint16MinSels  func([]uint16, []int64) uint16
-	Uint32Min      func([]uint32) uint32
-	Uint32MinSels  func([]uint32, []int64) uint32
-	Uint64Min      func([]uint64) uint64
-	Uint64MinSels  func([]uint64, []int64) uint64
-	Float32Min     func([]float32) float32
-	Float32MinSels func([]float32, []int64) float32
-	Float64Min     func([]float64) float64
-	Float64MinSels func([]float64, []int64) float64
-	StrMin         func(*types.Bytes) []byte
-	StrMinSels     func(*types.Bytes, []int64) []byte
+	BoolMin     = boolMin
+	BoolMinSels = boolMinSels
+
+	Int8Min        = numericMin[int8]
+	Int16Min       = numericMin[int16]
+	Int32Min       = numericMin[int32]
+	Int64Min       = numericMin[int64]
+	Uint8Min       = numericMin[uint8]
+	Uint16Min      = numericMin[uint16]
+	Uint32Min      = numericMin[uint32]
+	Uint64Min      = numericMin[uint64]
+	Float32Min     = numericMin[float32]
+	Float64Min     = numericMin[float64]
+	Int8MinSels    = numericMinSels[int8]
+	Int16MinSels   = numericMinSels[int16]
+	Int32MinSels   = numericMinSels[int32]
+	Int64MinSels   = numericMinSels[int64]
+	Uint8MinSels   = numericMinSels[uint8]
+	Uint16MinSels  = numericMinSels[uint16]
+	Uint32MinSels  = numericMinSels[uint32]
+	Uint64MinSels  = numericMinSels[uint64]
+	Float32MinSels = numericMinSels[float32]
+	Float64MinSels = numericMinSels[float64]
+
+	StrMin     = strMin
+	StrMinSels = strMinSels
 )
 
 func boolMin(xs []bool) bool {
@@ -65,7 +68,7 @@ func boolMinSels(xs []bool, sels []int64) bool {
 	return true
 }
 
-func int8Min(xs []int8) int8 {
+func numericMin[T constraints.Integer | constraints.Float](xs []T) T {
 	res := xs[0]
 	for _, x := range xs {
 		if x < res {
@@ -75,196 +78,7 @@ func int8Min(xs []int8) int8 {
 	return res
 }
 
-func int8MinSels(xs []int8, sels []int64) int8 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int16Min(xs []int16) int16 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int16MinSels(xs []int16, sels []int64) int16 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int32Min(xs []int32) int32 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int32MinSels(xs []int32, sels []int64) int32 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int64Min(xs []int64) int64 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func int64MinSels(xs []int64, sels []int64) int64 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint8Min(xs []uint8) uint8 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint8MinSels(xs []uint8, sels []int64) uint8 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint16Min(xs []uint16) uint16 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint16MinSels(xs []uint16, sels []int64) uint16 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint32Min(xs []uint32) uint32 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint32MinSels(xs []uint32, sels []int64) uint32 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint64Min(xs []uint64) uint64 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func uint64MinSels(xs []uint64, sels []int64) uint64 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float32Min(xs []float32) float32 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float32MinSels(xs []float32, sels []int64) float32 {
-	res := xs[sels[0]]
-	for _, sel := range sels {
-		x := xs[sel]
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float64Min(xs []float64) float64 {
-	res := xs[0]
-	for _, x := range xs {
-		if x < res {
-			res = x
-		}
-	}
-	return res
-}
-
-func float64MinSels(xs []float64, sels []int64) float64 {
+func numericMinSels[T constraints.Integer | constraints.Float](xs []T, sels []int64) T {
 	res := xs[sels[0]]
 	for _, sel := range sels {
 		x := xs[sel]
diff --git a/pkg/vectorize/min/min_amd64.go b/pkg/vectorize/min/min_amd64.go
index 9870c9cf91beaf6e4315c18c477b49bf37cd3666..7cbccf986ed78fc4a905960bcc24e00f689ba63e 100644
--- a/pkg/vectorize/min/min_amd64.go
+++ b/pkg/vectorize/min/min_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package min
 
 import (
@@ -56,41 +53,12 @@ func init() {
 		Int8Min = int8MinAvx2
 		Int16Min = int16MinAvx2
 		Int32Min = int32MinAvx2
-		Int64Min = int64Min
 		Uint8Min = uint8MinAvx2
 		Uint16Min = uint16MinAvx2
 		Uint32Min = uint32MinAvx2
-		Uint64Min = uint64Min
 		Float32Min = float32MinAvx2
 		Float64Min = float64MinAvx2
-	} else {
-		Int8Min = int8Min
-		Int16Min = int16Min
-		Int32Min = int32Min
-		Int64Min = int64Min
-		Uint8Min = uint8Min
-		Uint16Min = uint16Min
-		Uint32Min = uint32Min
-		Uint64Min = uint64Min
-		Float32Min = float32Min
-		Float64Min = float64Min
 	}
-
-	BoolMin = boolMin
-	StrMin = strMin
-
-	BoolMinSels = boolMinSels
-	Int8MinSels = int8MinSels
-	Int16MinSels = int16MinSels
-	Int32MinSels = int32MinSels
-	Int64MinSels = int64MinSels
-	Uint8MinSels = uint8MinSels
-	Uint16MinSels = uint16MinSels
-	Uint32MinSels = uint32MinSels
-	Uint64MinSels = uint64MinSels
-	Float32MinSels = float32MinSels
-	Float64MinSels = float64MinSels
-	StrMinSels = strMinSels
 }
 
 func int8MinAvx2(xs []int8) int8 {
diff --git a/pkg/vectorize/min/min_arm64.go b/pkg/vectorize/min/min_arm64.go
deleted file mode 100644
index d566e4acba107d57c67c78bb4964bb4b9de107c6..0000000000000000000000000000000000000000
--- a/pkg/vectorize/min/min_arm64.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build arm64
-// +build arm64
-
-package min
-
-func init() {
-	BoolMin = boolMin
-	Int8Min = int8Min
-	Int16Min = int16Min
-	Int32Min = int32Min
-	Int64Min = int64Min
-	Uint8Min = uint8Min
-	Uint16Min = uint16Min
-	Uint32Min = uint32Min
-	Uint64Min = uint64Min
-	Float32Min = float32Min
-	Float64Min = float64Min
-	StrMin = strMin
-
-	BoolMinSels = boolMinSels
-	Int8MinSels = int8MinSels
-	Int16MinSels = int16MinSels
-	Int32MinSels = int32MinSels
-	Int64MinSels = int64MinSels
-	Uint8MinSels = uint8MinSels
-	Uint16MinSels = uint16MinSels
-	Uint32MinSels = uint32MinSels
-	Uint64MinSels = uint64MinSels
-	Float32MinSels = float32MinSels
-	Float64MinSels = float64MinSels
-	StrMinSels = strMinSels
-}
diff --git a/pkg/vectorize/min/avx2.s b/pkg/vectorize/min/min_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/min/avx2.s
rename to pkg/vectorize/min/min_avx2_amd64.s
index 22a04a86001d2a742df6ce9a7e9ccdb063d36a79..f5e2c94df165394f32ba3a3fe45e4a16a647579f 100644
--- a/pkg/vectorize/min/avx2.s
+++ b/pkg/vectorize/min/min_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/min/avx512.s b/pkg/vectorize/min/min_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/min/avx512.s
rename to pkg/vectorize/min/min_avx512_amd64.s
index 19887c69cccd2b905fa98d5309a1f051c6b9412a..219907e38306727874b64dea308d8dcf96b01da4 100644
--- a/pkg/vectorize/min/avx512.s
+++ b/pkg/vectorize/min/min_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/mod/mod.go b/pkg/vectorize/mod/mod.go
index 08a726fce08a07fd56bf255cf522d319defdd113..4cb193167d657bb0e76dfbf8e530edc68749eaf0 100644
--- a/pkg/vectorize/mod/mod.go
+++ b/pkg/vectorize/mod/mod.go
@@ -14,546 +14,149 @@
 
 package mod
 
+import "golang.org/x/exp/constraints"
+
 var (
-	Int8Mod                func([]int8, []int8, []int8) []int8
-	Int8ModSels            func([]int8, []int8, []int8, []int64) []int8
-	Int8ModScalar          func(int8, []int8, []int8) []int8
-	Int8ModScalarSels      func(int8, []int8, []int8, []int64) []int8
-	Int8ModByScalar        func(int8, []int8, []int8) []int8
-	Int8ModByScalarSels    func(int8, []int8, []int8, []int64) []int8
-	Int16Mod               func([]int16, []int16, []int16) []int16
-	Int16ModSels           func([]int16, []int16, []int16, []int64) []int16
-	Int16ModScalar         func(int16, []int16, []int16) []int16
-	Int16ModScalarSels     func(int16, []int16, []int16, []int64) []int16
-	Int16ModByScalar       func(int16, []int16, []int16) []int16
-	Int16ModByScalarSels   func(int16, []int16, []int16, []int64) []int16
-	Int32Mod               func([]int32, []int32, []int32) []int32
-	Int32ModSels           func([]int32, []int32, []int32, []int64) []int32
-	Int32ModScalar         func(int32, []int32, []int32) []int32
-	Int32ModScalarSels     func(int32, []int32, []int32, []int64) []int32
-	Int32ModByScalar       func(int32, []int32, []int32) []int32
-	Int32ModByScalarSels   func(int32, []int32, []int32, []int64) []int32
-	Int64Mod               func([]int64, []int64, []int64) []int64
-	Int64ModSels           func([]int64, []int64, []int64, []int64) []int64
-	Int64ModScalar         func(int64, []int64, []int64) []int64
-	Int64ModScalarSels     func(int64, []int64, []int64, []int64) []int64
-	Int64ModByScalar       func(int64, []int64, []int64) []int64
-	Int64ModByScalarSels   func(int64, []int64, []int64, []int64) []int64
-	Uint8Mod               func([]uint8, []uint8, []uint8) []uint8
-	Uint8ModSels           func([]uint8, []uint8, []uint8, []int64) []uint8
-	Uint8ModScalar         func(uint8, []uint8, []uint8) []uint8
-	Uint8ModScalarSels     func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint8ModByScalar       func(uint8, []uint8, []uint8) []uint8
-	Uint8ModByScalarSels   func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint16Mod              func([]uint16, []uint16, []uint16) []uint16
-	Uint16ModSels          func([]uint16, []uint16, []uint16, []int64) []uint16
-	Uint16ModScalar        func(uint16, []uint16, []uint16) []uint16
-	Uint16ModScalarSels    func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint16ModByScalar      func(uint16, []uint16, []uint16) []uint16
-	Uint16ModByScalarSels  func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint32Mod              func([]uint32, []uint32, []uint32) []uint32
-	Uint32ModSels          func([]uint32, []uint32, []uint32, []int64) []uint32
-	Uint32ModScalar        func(uint32, []uint32, []uint32) []uint32
-	Uint32ModScalarSels    func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint32ModByScalar      func(uint32, []uint32, []uint32) []uint32
-	Uint32ModByScalarSels  func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint64Mod              func([]uint64, []uint64, []uint64) []uint64
-	Uint64ModSels          func([]uint64, []uint64, []uint64, []int64) []uint64
-	Uint64ModScalar        func(uint64, []uint64, []uint64) []uint64
-	Uint64ModScalarSels    func(uint64, []uint64, []uint64, []int64) []uint64
-	Uint64ModByScalar      func(uint64, []uint64, []uint64) []uint64
-	Uint64ModByScalarSels  func(uint64, []uint64, []uint64, []int64) []uint64
-	Float32Mod             func([]float32, []float32, []float32) []float32
-	Float32ModSels         func([]float32, []float32, []float32, []int64) []float32
-	Float32ModScalar       func(float32, []float32, []float32) []float32
-	Float32ModScalarSels   func(float32, []float32, []float32, []int64) []float32
-	Float32ModByScalar     func(float32, []float32, []float32) []float32
-	Float32ModByScalarSels func(float32, []float32, []float32, []int64) []float32
-	Float64Mod             func([]float64, []float64, []float64) []float64
-	Float64ModSels         func([]float64, []float64, []float64, []int64) []float64
-	Float64ModScalar       func(float64, []float64, []float64) []float64
-	Float64ModScalarSels   func(float64, []float64, []float64, []int64) []float64
-	Float64ModByScalar     func(float64, []float64, []float64) []float64
-	Float64ModByScalarSels func(float64, []float64, []float64, []int64) []float64
+	Int8Mod                = intMod[int8]
+	Int8ModSels            = intModSels[int8]
+	Int8ModScalar          = intModScalar[int8]
+	Int8ModScalarSels      = intModScalarSels[int8]
+	Int8ModByScalar        = intModByScalar[int8]
+	Int8ModByScalarSels    = intModByScalarSels[int8]
+	Int16Mod               = intMod[int16]
+	Int16ModSels           = intModSels[int16]
+	Int16ModScalar         = intModScalar[int16]
+	Int16ModScalarSels     = intModScalarSels[int16]
+	Int16ModByScalar       = intModByScalar[int16]
+	Int16ModByScalarSels   = intModByScalarSels[int16]
+	Int32Mod               = intMod[int32]
+	Int32ModSels           = intModSels[int32]
+	Int32ModScalar         = intModScalar[int32]
+	Int32ModScalarSels     = intModScalarSels[int32]
+	Int32ModByScalar       = intModByScalar[int32]
+	Int32ModByScalarSels   = intModByScalarSels[int32]
+	Int64Mod               = intMod[int64]
+	Int64ModSels           = intModSels[int64]
+	Int64ModScalar         = intModScalar[int64]
+	Int64ModScalarSels     = intModScalarSels[int64]
+	Int64ModByScalar       = intModByScalar[int64]
+	Int64ModByScalarSels   = intModByScalarSels[int64]
+	Uint8Mod               = intMod[uint8]
+	Uint8ModSels           = intModSels[uint8]
+	Uint8ModScalar         = intModScalar[uint8]
+	Uint8ModScalarSels     = intModScalarSels[uint8]
+	Uint8ModByScalar       = intModByScalar[uint8]
+	Uint8ModByScalarSels   = intModByScalarSels[uint8]
+	Uint16Mod              = intMod[uint16]
+	Uint16ModSels          = intModSels[uint16]
+	Uint16ModScalar        = intModScalar[uint16]
+	Uint16ModScalarSels    = intModScalarSels[uint16]
+	Uint16ModByScalar      = intModByScalar[uint16]
+	Uint16ModByScalarSels  = intModByScalarSels[uint16]
+	Uint32Mod              = intMod[uint32]
+	Uint32ModSels          = intModSels[uint32]
+	Uint32ModScalar        = intModScalar[uint32]
+	Uint32ModScalarSels    = intModScalarSels[uint32]
+	Uint32ModByScalar      = intModByScalar[uint32]
+	Uint32ModByScalarSels  = intModByScalarSels[uint32]
+	Uint64Mod              = intMod[uint64]
+	Uint64ModSels          = intModSels[uint64]
+	Uint64ModScalar        = intModScalar[uint64]
+	Uint64ModScalarSels    = intModScalarSels[uint64]
+	Uint64ModByScalar      = intModByScalar[uint64]
+	Uint64ModByScalarSels  = intModByScalarSels[uint64]
+	Float32Mod             = floatMod[float32]
+	Float32ModSels         = floatModSels[float32]
+	Float32ModScalar       = floatModScalar[float32]
+	Float32ModScalarSels   = floatModScalarSels[float32]
+	Float32ModByScalar     = floatModByScalar[float32]
+	Float32ModByScalarSels = floatModByScalarSels[float32]
+	Float64Mod             = floatMod[float64]
+	Float64ModSels         = floatModSels[float64]
+	Float64ModScalar       = floatModScalar[float64]
+	Float64ModScalarSels   = floatModScalarSels[float64]
+	Float64ModByScalar     = floatModByScalar[float64]
+	Float64ModByScalarSels = floatModByScalarSels[float64]
 )
 
-func init() {
-	Int8Mod = int8Mod
-	Int8ModSels = int8ModSels
-	Int8ModScalar = int8ModScalar
-	Int8ModScalarSels = int8ModScalarSels
-	Int8ModByScalar = int8ModByScalar
-	Int8ModByScalarSels = int8ModByScalarSels
-	Int16Mod = int16Mod
-	Int16ModSels = int16ModSels
-	Int16ModScalar = int16ModScalar
-	Int16ModScalarSels = int16ModScalarSels
-	Int16ModByScalar = int16ModByScalar
-	Int16ModByScalarSels = int16ModByScalarSels
-	Int32Mod = int32Mod
-	Int32ModSels = int32ModSels
-	Int32ModScalar = int32ModScalar
-	Int32ModScalarSels = int32ModScalarSels
-	Int32ModByScalar = int32ModByScalar
-	Int32ModByScalarSels = int32ModByScalarSels
-	Int64Mod = int64Mod
-	Int64ModSels = int64ModSels
-	Int64ModScalar = int64ModScalar
-	Int64ModScalarSels = int64ModScalarSels
-	Int64ModByScalar = int64ModByScalar
-	Int64ModByScalarSels = int64ModByScalarSels
-	Uint8Mod = uint8Mod
-	Uint8ModSels = uint8ModSels
-	Uint8ModScalar = uint8ModScalar
-	Uint8ModScalarSels = uint8ModScalarSels
-	Uint8ModByScalar = uint8ModByScalar
-	Uint8ModByScalarSels = uint8ModByScalarSels
-	Uint16Mod = uint16Mod
-	Uint16ModSels = uint16ModSels
-	Uint16ModScalar = uint16ModScalar
-	Uint16ModScalarSels = uint16ModScalarSels
-	Uint16ModByScalar = uint16ModByScalar
-	Uint16ModByScalarSels = uint16ModByScalarSels
-	Uint32Mod = uint32Mod
-	Uint32ModSels = uint32ModSels
-	Uint32ModScalar = uint32ModScalar
-	Uint32ModScalarSels = uint32ModScalarSels
-	Uint32ModByScalar = uint32ModByScalar
-	Uint32ModByScalarSels = uint32ModByScalarSels
-	Uint64Mod = uint64Mod
-	Uint64ModSels = uint64ModSels
-	Uint64ModScalar = uint64ModScalar
-	Uint64ModScalarSels = uint64ModScalarSels
-	Uint64ModByScalar = uint64ModByScalar
-	Uint64ModByScalarSels = uint64ModByScalarSels
-	Float32Mod = float32Mod
-	Float32ModSels = float32ModSels
-	Float32ModScalar = float32ModScalar
-	Float32ModScalarSels = float32ModScalarSels
-	Float32ModByScalar = float32ModByScalar
-	Float32ModByScalarSels = float32ModByScalarSels
-	Float64Mod = float64Mod
-	Float64ModSels = float64ModSels
-	Float64ModScalar = float64ModScalar
-	Float64ModScalarSels = float64ModScalarSels
-	Float64ModByScalar = float64ModByScalar
-	Float64ModByScalarSels = float64ModByScalarSels
-}
-
-func int8Mod(xs, ys, rs []int8) []int8 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func int8ModSels(xs, ys, rs []int8, sels []int64) []int8 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func int8ModScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func int8ModScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func int8ModByScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func int8ModByScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func int16Mod(xs, ys, rs []int16) []int16 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func int16ModSels(xs, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func int16ModScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func int16ModScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func int16ModByScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func int16ModByScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func int32Mod(xs, ys, rs []int32) []int32 {
+func intMod[T constraints.Integer](xs, ys, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = x % ys[i]
 	}
 	return rs
 }
 
-func int32ModSels(xs, ys, rs []int32, sels []int64) []int32 {
+func intModSels[T constraints.Integer](xs, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = xs[sel] % ys[sel]
 	}
 	return rs
 }
 
-func int32ModScalar(x int32, ys, rs []int32) []int32 {
+func intModScalar[T constraints.Integer](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = x % y
 	}
 	return rs
 }
 
-func int32ModScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
+func intModScalarSels[T constraints.Integer](x T, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = x % ys[sel]
 	}
 	return rs
 }
 
-func int32ModByScalar(x int32, ys, rs []int32) []int32 {
+func intModByScalar[T constraints.Integer](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = y % x
 	}
 	return rs
 }
 
-func int32ModByScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
+func intModByScalarSels[T constraints.Integer](x T, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = ys[sel] % x
 	}
 	return rs
 }
 
-func int64Mod(xs, ys, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func int64ModSels(xs, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func int64ModScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func int64ModScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func int64ModByScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func int64ModByScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func uint8Mod(xs, ys, rs []uint8) []uint8 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func uint8ModSels(xs, ys, rs []uint8, sels []int64) []uint8 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func uint8ModScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func uint8ModScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func uint8ModByScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func uint8ModByScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func uint16Mod(xs, ys, rs []uint16) []uint16 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func uint16ModSels(xs, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func uint16ModScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func uint16ModScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func uint16ModByScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func uint16ModByScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func uint32Mod(xs, ys, rs []uint32) []uint32 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func uint32ModSels(xs, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func uint32ModScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func uint32ModScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func uint32ModByScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func uint32ModByScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func uint64Mod(xs, ys, rs []uint64) []uint64 {
-	for i, x := range xs {
-		rs[i] = x % ys[i]
-	}
-	return rs
-}
-
-func uint64ModSels(xs, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] % ys[sel]
-	}
-	return rs
-}
-
-func uint64ModScalar(x uint64, ys, rs []uint64) []uint64 {
-	for i, y := range ys {
-		rs[i] = x % y
-	}
-	return rs
-}
-
-func uint64ModScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = x % ys[sel]
-	}
-	return rs
-}
-
-func uint64ModByScalar(x uint64, ys, rs []uint64) []uint64 {
-	for i, y := range ys {
-		rs[i] = y % x
-	}
-	return rs
-}
-
-func uint64ModByScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] % x
-	}
-	return rs
-}
-
-func float32Mod(xs, ys, rs []float32) []float32 {
-	for i, x := range xs {
-		rs[i] = x - x/ys[i]*ys[i]
-	}
-	return rs
-}
-
-func float32ModSels(xs, ys, rs []float32, sels []int64) []float32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - xs[sel]/ys[sel]*ys[sel]
-	}
-	return rs
-}
-
-func float32ModScalar(x float32, ys, rs []float32) []float32 {
-	for i, y := range ys {
-		rs[i] = x - x/y*y
-	}
-	return rs
-}
-
-func float32ModScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
-	for _, sel := range sels {
-		rs[sel] = x - x/ys[sel]*ys[sel]
-	}
-	return rs
-}
-
-func float32ModByScalar(x float32, ys, rs []float32) []float32 {
-	for i, y := range ys {
-		rs[i] = y - y/x*x
-	}
-	return rs
-}
-
-func float32ModByScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] - ys[sel]/x*x
-	}
-	return rs
-}
-
-func float64Mod(xs, ys, rs []float64) []float64 {
+func floatMod[T constraints.Float](xs, ys, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = x - x/ys[i]*ys[i]
 	}
 	return rs
 }
 
-func float64ModSels(xs, ys, rs []float64, sels []int64) []float64 {
+func floatModSels[T constraints.Float](xs, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = xs[sel] - xs[sel]/ys[sel]*ys[sel]
 	}
 	return rs
 }
 
-func float64ModScalar(x float64, ys, rs []float64) []float64 {
+func floatModScalar[T constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = x - x/y*y
 	}
 	return rs
 }
 
-func float64ModScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func floatModScalarSels[T constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = x - x/ys[sel]*ys[sel]
 	}
 	return rs
 }
 
-func float64ModByScalar(x float64, ys, rs []float64) []float64 {
+func floatModByScalar[T constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = y - y/x*x
 	}
 	return rs
 }
 
-func float64ModByScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func floatModByScalarSels[T constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for _, sel := range sels {
 		rs[sel] = ys[sel] - ys[sel]/x*x
 	}
diff --git a/pkg/vectorize/mul/mul.go b/pkg/vectorize/mul/mul.go
index d43281fcf3d27b2560f902d235b42b180f6fb844..4481ee8f6dac98885aa13f6cd8e4a6f7805c02b5 100644
--- a/pkg/vectorize/mul/mul.go
+++ b/pkg/vectorize/mul/mul.go
@@ -14,625 +14,147 @@
 
 package mul
 
-import "github.com/matrixorigin/matrixone/pkg/container/types"
-
-var (
-	Int8Mul                 func([]int8, []int8, []int8) []int8
-	Int8MulSels             func([]int8, []int8, []int8, []int64) []int8
-	Int8MulScalar           func(int8, []int8, []int8) []int8
-	Int8MulScalarSels       func(int8, []int8, []int8, []int64) []int8
-	Int16Mul                func([]int16, []int16, []int16) []int16
-	Int16MulSels            func([]int16, []int16, []int16, []int64) []int16
-	Int16MulScalar          func(int16, []int16, []int16) []int16
-	Int16MulScalarSels      func(int16, []int16, []int16, []int64) []int16
-	Int32Mul                func([]int32, []int32, []int32) []int32
-	Int32MulSels            func([]int32, []int32, []int32, []int64) []int32
-	Int32MulScalar          func(int32, []int32, []int32) []int32
-	Int32MulScalarSels      func(int32, []int32, []int32, []int64) []int32
-	Int64Mul                func([]int64, []int64, []int64) []int64
-	Int64MulSels            func([]int64, []int64, []int64, []int64) []int64
-	Int64MulScalar          func(int64, []int64, []int64) []int64
-	Int64MulScalarSels      func(int64, []int64, []int64, []int64) []int64
-	Uint8Mul                func([]uint8, []uint8, []uint8) []uint8
-	Uint8MulSels            func([]uint8, []uint8, []uint8, []int64) []uint8
-	Uint8MulScalar          func(uint8, []uint8, []uint8) []uint8
-	Uint8MulScalarSels      func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint16Mul               func([]uint16, []uint16, []uint16) []uint16
-	Uint16MulSels           func([]uint16, []uint16, []uint16, []int64) []uint16
-	Uint16MulScalar         func(uint16, []uint16, []uint16) []uint16
-	Uint16MulScalarSels     func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint32Mul               func([]uint32, []uint32, []uint32) []uint32
-	Uint32MulSels           func([]uint32, []uint32, []uint32, []int64) []uint32
-	Uint32MulScalar         func(uint32, []uint32, []uint32) []uint32
-	Uint32MulScalarSels     func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint64Mul               func([]uint64, []uint64, []uint64) []uint64
-	Uint64MulSels           func([]uint64, []uint64, []uint64, []int64) []uint64
-	Uint64MulScalar         func(uint64, []uint64, []uint64) []uint64
-	Uint64MulScalarSels     func(uint64, []uint64, []uint64, []int64) []uint64
-	Float32Mul              func([]float32, []float32, []float32) []float32
-	Float32MulSels          func([]float32, []float32, []float32, []int64) []float32
-	Float32MulScalar        func(float32, []float32, []float32) []float32
-	Float32MulScalarSels    func(float32, []float32, []float32, []int64) []float32
-	Float64Mul              func([]float64, []float64, []float64) []float64
-	Float64MulSels          func([]float64, []float64, []float64, []int64) []float64
-	Float64MulScalar        func(float64, []float64, []float64) []float64
-	Float64MulScalarSels    func(float64, []float64, []float64, []int64) []float64
-	Decimal64Mul            func([]types.Decimal64, []types.Decimal64, []types.Decimal128) []types.Decimal128
-	Decimal64MulSels        func([]types.Decimal64, []types.Decimal64, []types.Decimal128, []int64) []types.Decimal128
-	Decimal64MulScalar      func(types.Decimal64, []types.Decimal64, []types.Decimal128) []types.Decimal128
-	Decimal64MulScalarSels  func(types.Decimal64, []types.Decimal64, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128Mul           func([]types.Decimal128, []types.Decimal128, []types.Decimal128) []types.Decimal128
-	Decimal128MulSels       func([]types.Decimal128, []types.Decimal128, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128MulScalar     func(types.Decimal128, []types.Decimal128, []types.Decimal128) []types.Decimal128
-	Decimal128MulScalarSels func(types.Decimal128, []types.Decimal128, []types.Decimal128, []int64) []types.Decimal128
-
-	Int32Int64Mul         func([]int32, []int64, []int64) []int64
-	Int32Int64MulSels     func([]int32, []int64, []int64, []int64) []int64
-	Int16Int64Mul         func([]int16, []int64, []int64) []int64
-	Int16Int64MulSels     func([]int16, []int64, []int64, []int64) []int64
-	Int8Int64Mul          func([]int8, []int64, []int64) []int64
-	Int8Int64MulSels      func([]int8, []int64, []int64, []int64) []int64
-	Int16Int32Mul         func([]int16, []int32, []int32) []int32
-	Int16Int32MulSels     func([]int16, []int32, []int32, []int64) []int32
-	Int8Int32Mul          func([]int8, []int32, []int32) []int32
-	Int8Int32MulSels      func([]int8, []int32, []int32, []int64) []int32
-	Int8Int16Mul          func([]int8, []int16, []int16) []int16
-	Int8Int16MulSels      func([]int8, []int16, []int16, []int64) []int16
-	Float32Float64Mul     func([]float32, []float64, []float64) []float64
-	Float32Float64MulSels func([]float32, []float64, []float64, []int64) []float64
-	Uint32Uint64Mul       func([]uint32, []uint64, []uint64) []uint64
-	Uint32Uint64MulSels   func([]uint32, []uint64, []uint64, []int64) []uint64
-	Uint16Uint64Mul       func([]uint16, []uint64, []uint64) []uint64
-	Uint16Uint64MulSels   func([]uint16, []uint64, []uint64, []int64) []uint64
-	Uint8Uint64Mul        func([]uint8, []uint64, []uint64) []uint64
-	Uint8Uint64MulSels    func([]uint8, []uint64, []uint64, []int64) []uint64
-	Uint16Uint32Mul       func([]uint16, []uint32, []uint32) []uint32
-	Uint16Uint32MulSels   func([]uint16, []uint32, []uint32, []int64) []uint32
-	Uint8Uint32Mul        func([]uint8, []uint32, []uint32) []uint32
-	Uint8Uint32MulSels    func([]uint8, []uint32, []uint32, []int64) []uint32
-	Uint8Uint16Mul        func([]uint8, []uint16, []uint16) []uint16
-	Uint8Uint16MulSels    func([]uint8, []uint16, []uint16, []int64) []uint16
+import (
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
-func init() {
-	Int8Mul = int8Mul
-	Int8MulSels = int8MulSels
-	Int8MulScalar = int8MulScalar
-	Int8MulScalarSels = int8MulScalarSels
-	Int16Mul = int16Mul
-	Int16MulSels = int16MulSels
-	Int16MulScalar = int16MulScalar
-	Int16MulScalarSels = int16MulScalarSels
-	Int32Mul = int32Mul
-	Int32MulSels = int32MulSels
-	Int32MulScalar = int32MulScalar
-	Int32MulScalarSels = int32MulScalarSels
-	Int64Mul = int64Mul
-	Int64MulSels = int64MulSels
-	Int64MulScalar = int64MulScalar
-	Int64MulScalarSels = int64MulScalarSels
-	Uint8Mul = uint8Mul
-	Uint8MulSels = uint8MulSels
-	Uint8MulScalar = uint8MulScalar
-	Uint8MulScalarSels = uint8MulScalarSels
-	Uint16Mul = uint16Mul
-	Uint16MulSels = uint16MulSels
-	Uint16MulScalar = uint16MulScalar
-	Uint16MulScalarSels = uint16MulScalarSels
-	Uint32Mul = uint32Mul
-	Uint32MulSels = uint32MulSels
-	Uint32MulScalar = uint32MulScalar
-	Uint32MulScalarSels = uint32MulScalarSels
-	Uint64Mul = uint64Mul
-	Uint64MulSels = uint64MulSels
-	Uint64MulScalar = uint64MulScalar
-	Uint64MulScalarSels = uint64MulScalarSels
-	Float32Mul = float32Mul
-	Float32MulSels = float32MulSels
-	Float32MulScalar = float32MulScalar
-	Float32MulScalarSels = float32MulScalarSels
-	Float64Mul = float64Mul
-	Float64MulSels = float64MulSels
-	Float64MulScalar = float64MulScalar
-	Float64MulScalarSels = float64MulScalarSels
-	Decimal64Mul = decimal64Mul
-	Decimal64MulSels = decimal64MulSels
-	Decimal64MulScalar = decimal64MulScalar
-	Decimal64MulScalarSels = decimal64MulScalarSels
-	Decimal128Mul = decimal128Mul
-	Decimal128MulSels = decimal128MulSels
-	Decimal128MulScalar = decimal128MulScalar
+var (
+	Int8Mul              = numericMul[int8]
+	Int8MulSels          = numericMulSels[int8]
+	Int8MulScalar        = numericMulScalar[int8]
+	Int8MulScalarSels    = numericMulScalarSels[int8]
+	Int16Mul             = numericMul[int16]
+	Int16MulSels         = numericMulSels[int16]
+	Int16MulScalar       = numericMulScalar[int16]
+	Int16MulScalarSels   = numericMulScalarSels[int16]
+	Int32Mul             = numericMul[int32]
+	Int32MulSels         = numericMulSels[int32]
+	Int32MulScalar       = numericMulScalar[int32]
+	Int32MulScalarSels   = numericMulScalarSels[int32]
+	Int64Mul             = numericMul[int64]
+	Int64MulSels         = numericMulSels[int64]
+	Int64MulScalar       = numericMulScalar[int64]
+	Int64MulScalarSels   = numericMulScalarSels[int64]
+	Uint8Mul             = numericMul[uint8]
+	Uint8MulSels         = numericMulSels[uint8]
+	Uint8MulScalar       = numericMulScalar[uint8]
+	Uint8MulScalarSels   = numericMulScalarSels[uint8]
+	Uint16Mul            = numericMul[uint16]
+	Uint16MulSels        = numericMulSels[uint16]
+	Uint16MulScalar      = numericMulScalar[uint16]
+	Uint16MulScalarSels  = numericMulScalarSels[uint16]
+	Uint32Mul            = numericMul[uint32]
+	Uint32MulSels        = numericMulSels[uint32]
+	Uint32MulScalar      = numericMulScalar[uint32]
+	Uint32MulScalarSels  = numericMulScalarSels[uint32]
+	Uint64Mul            = numericMul[uint64]
+	Uint64MulSels        = numericMulSels[uint64]
+	Uint64MulScalar      = numericMulScalar[uint64]
+	Uint64MulScalarSels  = numericMulScalarSels[uint64]
+	Float32Mul           = numericMul[float32]
+	Float32MulSels       = numericMulSels[float32]
+	Float32MulScalar     = numericMulScalar[float32]
+	Float32MulScalarSels = numericMulScalarSels[float32]
+	Float64Mul           = numericMul[float64]
+	Float64MulSels       = numericMulSels[float64]
+	Float64MulScalar     = numericMulScalar[float64]
+	Float64MulScalarSels = numericMulScalarSels[float64]
+
+	Int32Int64Mul         = numericMul2[int32, int64]
+	Int32Int64MulSels     = numericMulSels2[int32, int64]
+	Int16Int64Mul         = numericMul2[int16, int64]
+	Int16Int64MulSels     = numericMulSels2[int16, int64]
+	Int8Int64Mul          = numericMul2[int8, int64]
+	Int8Int64MulSels      = numericMulSels2[int8, int64]
+	Int16Int32Mul         = numericMul2[int16, int32]
+	Int16Int32MulSels     = numericMulSels2[int16, int32]
+	Int8Int32Mul          = numericMul2[int8, int32]
+	Int8Int32MulSels      = numericMulSels2[int8, int32]
+	Int8Int16Mul          = numericMul2[int8, int16]
+	Int8Int16MulSels      = numericMulSels2[int8, int16]
+	Float32Float64Mul     = numericMul2[float32, float64]
+	Float32Float64MulSels = numericMulSels2[float32, float64]
+	Uint32Uint64Mul       = numericMul2[uint32, uint64]
+	Uint32Uint64MulSels   = numericMulSels2[uint32, uint64]
+	Uint16Uint64Mul       = numericMul2[uint16, uint64]
+	Uint16Uint64MulSels   = numericMulSels2[uint16, uint64]
+	Uint8Uint64Mul        = numericMul2[uint8, uint64]
+	Uint8Uint64MulSels    = numericMulSels2[uint8, uint64]
+	Uint16Uint32Mul       = numericMul2[uint16, uint32]
+	Uint16Uint32MulSels   = numericMulSels2[uint16, uint32]
+	Uint8Uint32Mul        = numericMul2[uint8, uint32]
+	Uint8Uint32MulSels    = numericMulSels2[uint8, uint32]
+	Uint8Uint16Mul        = numericMul2[uint8, uint16]
+	Uint8Uint16MulSels    = numericMulSels2[uint8, uint16]
+
+	Decimal64Mul            = decimal64Mul
+	Decimal64MulSels        = decimal64MulSels
+	Decimal64MulScalar      = decimal64MulScalar
+	Decimal64MulScalarSels  = decimal64MulScalarSels
+	Decimal128Mul           = decimal128Mul
+	Decimal128MulSels       = decimal128MulSels
+	Decimal128MulScalar     = decimal128MulScalar
 	Decimal128MulScalarSels = decimal128MulScalarSels
+)
 
-	Int32Int64Mul = int32Int64Mul
-	Int32Int64MulSels = int32Int64MulSels
-	Int16Int64Mul = int16Int64Mul
-	Int16Int64MulSels = int16Int64MulSels
-	Int8Int64Mul = int8Int64Mul
-	Int8Int64MulSels = int8Int64MulSels
-	Int16Int32Mul = int16Int32Mul
-	Int16Int32MulSels = int16Int32MulSels
-	Int8Int32Mul = int8Int32Mul
-	Int8Int32MulSels = int8Int32MulSels
-	Int8Int16Mul = int8Int16Mul
-	Int8Int16MulSels = int8Int16MulSels
-	Float32Float64Mul = float32Float64Mul
-	Float32Float64MulSels = float32Float64MulSels
-	Uint32Uint64Mul = uint32Uint64Mul
-	Uint32Uint64MulSels = uint32Uint64MulSels
-	Uint16Uint64Mul = uint16Uint64Mul
-	Uint16Uint64MulSels = uint16Uint64MulSels
-	Uint8Uint64Mul = uint8Uint64Mul
-	Uint8Uint64MulSels = uint8Uint64MulSels
-	Uint16Uint32Mul = uint16Uint32Mul
-	Uint16Uint32MulSels = uint16Uint32MulSels
-	Uint8Uint32Mul = uint8Uint32Mul
-	Uint8Uint32MulSels = uint8Uint32MulSels
-	Uint8Uint16Mul = uint8Uint16Mul
-	Uint8Uint16MulSels = uint8Uint16MulSels
-}
-
-func int8Mul(xs, ys, rs []int8) []int8 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func int8MulSels(xs, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func int8MulScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func int8MulScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func int16Mul(xs, ys, rs []int16) []int16 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func int16MulSels(xs, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func int16MulScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func int16MulScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func int32Mul(xs, ys, rs []int32) []int32 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func int32MulSels(xs, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func int32MulScalar(x int32, ys, rs []int32) []int32 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func int32MulScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func int64Mul(xs, ys, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func int64MulSels(xs, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func int64MulScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func int64MulScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func uint8Mul(xs, ys, rs []uint8) []uint8 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func uint8MulSels(xs, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func uint8MulScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func uint8MulScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func uint16Mul(xs, ys, rs []uint16) []uint16 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func uint16MulSels(xs, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func uint16MulScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func uint16MulScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func uint32Mul(xs, ys, rs []uint32) []uint32 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func uint32MulSels(xs, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func uint32MulScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func uint32MulScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func uint64Mul(xs, ys, rs []uint64) []uint64 {
-	for i, x := range xs {
-		rs[i] = x * ys[i]
-	}
-	return rs
-}
-
-func uint64MulSels(xs, ys, rs []uint64, sels []int64) []uint64 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
-	}
-	return rs
-}
-
-func uint64MulScalar(x uint64, ys, rs []uint64) []uint64 {
-	for i, y := range ys {
-		rs[i] = x * y
-	}
-	return rs
-}
-
-func uint64MulScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
-	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func float32Mul(xs, ys, rs []float32) []float32 {
+func numericMul[T constraints.Integer | constraints.Float](xs, ys, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = x * ys[i]
 	}
 	return rs
 }
 
-func float32MulSels(xs, ys, rs []float32, sels []int64) []float32 {
+func numericMulSels[T constraints.Integer | constraints.Float](xs, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = xs[sel] * ys[sel]
 	}
 	return rs
 }
 
-func float32MulScalar(x float32, ys, rs []float32) []float32 {
+func numericMulScalar[T constraints.Integer | constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = x * y
 	}
 	return rs
 }
 
-func float32MulScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
+func numericMulScalarSels[T constraints.Integer | constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = x * ys[sel]
 	}
 	return rs
 }
 
-func float64Mul(xs, ys, rs []float64) []float64 {
+func numericMul2[TSmall, TBig constraints.Integer | constraints.Float](xs []TSmall, ys, rs []TBig) []TBig {
 	for i, x := range xs {
-		rs[i] = x * ys[i]
+		rs[i] = TBig(x) * ys[i]
 	}
 	return rs
 }
 
-func float64MulSels(xs, ys, rs []float64, sels []int64) []float64 {
+func numericMulSels2[TSmall, TBig constraints.Integer | constraints.Float](xs []TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = xs[sel] * ys[sel]
+		rs[i] = TBig(xs[sel]) * ys[sel]
 	}
 	return rs
 }
 
-func float64MulScalar(x float64, ys, rs []float64) []float64 {
+/*
+func numericMulScalar2[TSmall, TBig constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig) []TBig {
 	for i, y := range ys {
-		rs[i] = x * y
+		rs[i] = TBig(x) * y
 	}
 	return rs
 }
 
-func float64MulScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func numericMulScalarSels2[TSmall, TBig constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = x * ys[sel]
-	}
-	return rs
-}
-
-func int32Int64Mul(xs []int32, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = ys[i] * int64(xs[i])
-	}
-	return rs
-}
-
-func int32Int64MulSels(xs []int32, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * int64(xs[sel])
-	}
-	return rs
-}
-
-func int16Int64Mul(xs []int16, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = ys[i] * int64(xs[i])
-	}
-	return rs
-}
-
-func int16Int64MulSels(xs []int16, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * int64(xs[sel])
-	}
-	return rs
-}
-
-func int8Int64Mul(xs []int8, ys, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = ys[i] * int64(xs[i])
-	}
-	return rs
-}
-
-func int8Int64MulSels(xs []int8, ys, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * int64(xs[sel])
-	}
-	return rs
-}
-
-func int16Int32Mul(xs []int16, ys, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = ys[i] * int32(xs[i])
-	}
-	return rs
-}
-
-func int16Int32MulSels(xs []int16, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * int32(xs[sel])
-	}
-	return rs
-}
-
-func int8Int32Mul(xs []int8, ys, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = ys[i] * int32(xs[i])
-	}
-	return rs
-}
-
-func int8Int32MulSels(xs []int8, ys, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * int32(xs[sel])
-	}
-	return rs
-}
-
-func int8Int16Mul(xs []int8, ys, rs []int16) []int16 {
-	for i := range rs {
-		rs[i] = ys[i] * int16(xs[i])
-	}
-	return rs
-}
-
-func int8Int16MulSels(xs []int8, ys, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * int16(xs[sel])
-	}
-	return rs
-}
-
-func float32Float64Mul(xs []float32, ys, rs []float64) []float64 {
-	for i := range rs {
-		rs[i] = ys[i] * float64(xs[i])
-	}
-	return rs
-}
-
-func float32Float64MulSels(xs []float32, ys, rs []float64, sels []int64) []float64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * float64(xs[sel])
-	}
-	return rs
-}
-
-func uint32Uint64Mul(xs []uint32, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = ys[i] * uint64(xs[i])
-	}
-	return rs
-}
-
-func uint32Uint64MulSels(xs []uint32, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * uint64(xs[sel])
-	}
-	return rs
-}
-
-func uint16Uint64Mul(xs []uint16, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = ys[i] * uint64(xs[i])
-	}
-	return rs
-}
-
-func uint16Uint64MulSels(xs []uint16, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * uint64(xs[sel])
-	}
-	return rs
-}
-
-func uint8Uint64Mul(xs []uint8, ys, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = ys[i] * uint64(xs[i])
-	}
-	return rs
-}
-
-func uint8Uint64MulSels(xs []uint8, ys, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * uint64(xs[sel])
-	}
-	return rs
-}
-
-func uint16Uint32Mul(xs []uint16, ys, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = ys[i] * uint32(xs[i])
-	}
-	return rs
-}
-
-func uint16Uint32MulSels(xs []uint16, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * uint32(xs[sel])
-	}
-	return rs
-}
-
-func uint8Uint32Mul(xs []uint8, ys, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = ys[i] * uint32(xs[i])
-	}
-	return rs
-}
-
-func uint8Uint32MulSels(xs []uint8, ys, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * uint32(xs[sel])
-	}
-	return rs
-}
-
-func uint8Uint16Mul(xs []uint8, ys, rs []uint16) []uint16 {
-	for i := range rs {
-		rs[i] = ys[i] * uint16(xs[i])
-	}
-	return rs
-}
-
-func uint8Uint16MulSels(xs []uint8, ys, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = ys[sel] * uint16(xs[sel])
+		rs[i] = TBig(x) * ys[sel]
 	}
 	return rs
 }
+*/
 
 func decimal64Mul(xs []types.Decimal64, ys []types.Decimal64, rs []types.Decimal128) []types.Decimal128 {
 	for i, x := range xs {
diff --git a/pkg/vectorize/ne/ne.go b/pkg/vectorize/ne/ne.go
index 961065376c2910b0fce8c2f0e578989bdc5ca558..4c02f831cd9f88f23e4a43ba1c384d7ba7429ff6 100644
--- a/pkg/vectorize/ne/ne.go
+++ b/pkg/vectorize/ne/ne.go
@@ -16,1290 +16,124 @@ package ne
 
 import (
 	"bytes"
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
 
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
+
 	roaring "github.com/RoaringBitmap/roaring/roaring64"
 )
 
 var (
-	Int8Ne                      func([]int8, []int8, []int64) []int64
-	Int8NeNullable              func([]int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8NeSels                  func([]int8, []int8, []int64, []int64) []int64
-	Int8NeNullableSels          func([]int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int8NeScalar                func(int8, []int8, []int64) []int64
-	Int8NeNullableScalar        func(int8, []int8, *roaring.Bitmap, []int64) []int64
-	Int8NeScalarSels            func(int8, []int8, []int64, []int64) []int64
-	Int8NeNullableScalarSels    func(int8, []int8, *roaring.Bitmap, []int64, []int64) []int64
-	Int16Ne                     func([]int16, []int16, []int64) []int64
-	Int16NeNullable             func([]int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16NeSels                 func([]int16, []int16, []int64, []int64) []int64
-	Int16NeNullableSels         func([]int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int16NeScalar               func(int16, []int16, []int64) []int64
-	Int16NeNullableScalar       func(int16, []int16, *roaring.Bitmap, []int64) []int64
-	Int16NeScalarSels           func(int16, []int16, []int64, []int64) []int64
-	Int16NeNullableScalarSels   func(int16, []int16, *roaring.Bitmap, []int64, []int64) []int64
-	Int32Ne                     func([]int32, []int32, []int64) []int64
-	Int32NeNullable             func([]int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32NeSels                 func([]int32, []int32, []int64, []int64) []int64
-	Int32NeNullableSels         func([]int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int32NeScalar               func(int32, []int32, []int64) []int64
-	Int32NeNullableScalar       func(int32, []int32, *roaring.Bitmap, []int64) []int64
-	Int32NeScalarSels           func(int32, []int32, []int64, []int64) []int64
-	Int32NeNullableScalarSels   func(int32, []int32, *roaring.Bitmap, []int64, []int64) []int64
-	Int64Ne                     func([]int64, []int64, []int64) []int64
-	Int64NeNullable             func([]int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64NeSels                 func([]int64, []int64, []int64, []int64) []int64
-	Int64NeNullableSels         func([]int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Int64NeScalar               func(int64, []int64, []int64) []int64
-	Int64NeNullableScalar       func(int64, []int64, *roaring.Bitmap, []int64) []int64
-	Int64NeScalarSels           func(int64, []int64, []int64, []int64) []int64
-	Int64NeNullableScalarSels   func(int64, []int64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8Ne                     func([]uint8, []uint8, []int64) []int64
-	Uint8NeNullable             func([]uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8NeSels                 func([]uint8, []uint8, []int64, []int64) []int64
-	Uint8NeNullableSels         func([]uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint8NeScalar               func(uint8, []uint8, []int64) []int64
-	Uint8NeNullableScalar       func(uint8, []uint8, *roaring.Bitmap, []int64) []int64
-	Uint8NeScalarSels           func(uint8, []uint8, []int64, []int64) []int64
-	Uint8NeNullableScalarSels   func(uint8, []uint8, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16Ne                    func([]uint16, []uint16, []int64) []int64
-	Uint16NeNullable            func([]uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16NeSels                func([]uint16, []uint16, []int64, []int64) []int64
-	Uint16NeNullableSels        func([]uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint16NeScalar              func(uint16, []uint16, []int64) []int64
-	Uint16NeNullableScalar      func(uint16, []uint16, *roaring.Bitmap, []int64) []int64
-	Uint16NeScalarSels          func(uint16, []uint16, []int64, []int64) []int64
-	Uint16NeNullableScalarSels  func(uint16, []uint16, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32Ne                    func([]uint32, []uint32, []int64) []int64
-	Uint32NeNullable            func([]uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32NeSels                func([]uint32, []uint32, []int64, []int64) []int64
-	Uint32NeNullableSels        func([]uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint32NeScalar              func(uint32, []uint32, []int64) []int64
-	Uint32NeNullableScalar      func(uint32, []uint32, *roaring.Bitmap, []int64) []int64
-	Uint32NeScalarSels          func(uint32, []uint32, []int64, []int64) []int64
-	Uint32NeNullableScalarSels  func(uint32, []uint32, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64Ne                    func([]uint64, []uint64, []int64) []int64
-	Uint64NeNullable            func([]uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64NeSels                func([]uint64, []uint64, []int64, []int64) []int64
-	Uint64NeNullableSels        func([]uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Uint64NeScalar              func(uint64, []uint64, []int64) []int64
-	Uint64NeNullableScalar      func(uint64, []uint64, *roaring.Bitmap, []int64) []int64
-	Uint64NeScalarSels          func(uint64, []uint64, []int64, []int64) []int64
-	Uint64NeNullableScalarSels  func(uint64, []uint64, *roaring.Bitmap, []int64, []int64) []int64
-	Float32Ne                   func([]float32, []float32, []int64) []int64
-	Float32NeNullable           func([]float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32NeSels               func([]float32, []float32, []int64, []int64) []int64
-	Float32NeNullableSels       func([]float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float32NeScalar             func(float32, []float32, []int64) []int64
-	Float32NeNullableScalar     func(float32, []float32, *roaring.Bitmap, []int64) []int64
-	Float32NeScalarSels         func(float32, []float32, []int64, []int64) []int64
-	Float32NeNullableScalarSels func(float32, []float32, *roaring.Bitmap, []int64, []int64) []int64
-	Float64Ne                   func([]float64, []float64, []int64) []int64
-	Float64NeNullable           func([]float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64NeSels               func([]float64, []float64, []int64, []int64) []int64
-	Float64NeNullableSels       func([]float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	Float64NeScalar             func(float64, []float64, []int64) []int64
-	Float64NeNullableScalar     func(float64, []float64, *roaring.Bitmap, []int64) []int64
-	Float64NeScalarSels         func(float64, []float64, []int64, []int64) []int64
-	Float64NeNullableScalarSels func(float64, []float64, *roaring.Bitmap, []int64, []int64) []int64
-	StrNe                       func(*types.Bytes, *types.Bytes, []int64) []int64
-	StrNeNullable               func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrNeSels                   func(*types.Bytes, *types.Bytes, []int64, []int64) []int64
-	StrNeNullableSels           func(*types.Bytes, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-	StrNeScalar                 func([]byte, *types.Bytes, []int64) []int64
-	StrNeNullableScalar         func([]byte, *types.Bytes, *roaring.Bitmap, []int64) []int64
-	StrNeScalarSels             func([]byte, *types.Bytes, []int64, []int64) []int64
-	StrNeNullableScalarSels     func([]byte, *types.Bytes, *roaring.Bitmap, []int64, []int64) []int64
-
-	Decimal64Ne                    func([]types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64NeNullable            func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64NeSels                func([]types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64NeNullableSels        func([]types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal64NeScalar              func(types.Decimal64, []types.Decimal64, int32, int32, []int64) []int64
-	Decimal64NeNullableScalar      func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal64NeScalarSels          func(types.Decimal64, []types.Decimal64, int32, int32, []int64, []int64) []int64
-	Decimal64NeNullableScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128Ne                   func([]types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128NeNullable           func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128NeSels               func([]types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128NeNullableSels       func([]types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-	Decimal128NeScalar             func(types.Decimal128, []types.Decimal128, int32, int32, []int64) []int64
-	Decimal128NeNullableScalar     func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64) []int64
-	Decimal128NeScalarSels         func(types.Decimal128, []types.Decimal128, int32, int32, []int64, []int64) []int64
-	Decimal128NeNullableScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, *roaring.Bitmap, []int64, []int64) []int64
-)
-
-func init() {
-	Int8Ne = int8Ne
-	Int8NeNullable = int8NeNullable
-	Int8NeSels = int8NeSels
-	Int8NeNullableSels = int8NeNullableSels
-	Int8NeScalar = int8NeScalar
-	Int8NeNullableScalar = int8NeNullableScalar
-	Int8NeScalarSels = int8NeScalarSels
-	Int8NeNullableScalarSels = int8NeNullableScalarSels
-	Int16Ne = int16Ne
-	Int16NeNullable = int16NeNullable
-	Int16NeSels = int16NeSels
-	Int16NeNullableSels = int16NeNullableSels
-	Int16NeScalar = int16NeScalar
-	Int16NeNullableScalar = int16NeNullableScalar
-	Int16NeScalarSels = int16NeScalarSels
-	Int16NeNullableScalarSels = int16NeNullableScalarSels
-	Int32Ne = int32Ne
-	Int32NeNullable = int32NeNullable
-	Int32NeSels = int32NeSels
-	Int32NeNullableSels = int32NeNullableSels
-	Int32NeScalar = int32NeScalar
-	Int32NeNullableScalar = int32NeNullableScalar
-	Int32NeScalarSels = int32NeScalarSels
-	Int32NeNullableScalarSels = int32NeNullableScalarSels
-	Int64Ne = int64Ne
-	Int64NeNullable = int64NeNullable
-	Int64NeSels = int64NeSels
-	Int64NeNullableSels = int64NeNullableSels
-	Int64NeScalar = int64NeScalar
-	Int64NeNullableScalar = int64NeNullableScalar
-	Int64NeScalarSels = int64NeScalarSels
-	Int64NeNullableScalarSels = int64NeNullableScalarSels
-	Uint8Ne = uint8Ne
-	Uint8NeNullable = uint8NeNullable
-	Uint8NeSels = uint8NeSels
-	Uint8NeNullableSels = uint8NeNullableSels
-	Uint8NeScalar = uint8NeScalar
-	Uint8NeNullableScalar = uint8NeNullableScalar
-	Uint8NeScalarSels = uint8NeScalarSels
-	Uint8NeNullableScalarSels = uint8NeNullableScalarSels
-	Uint16Ne = uint16Ne
-	Uint16NeNullable = uint16NeNullable
-	Uint16NeSels = uint16NeSels
-	Uint16NeNullableSels = uint16NeNullableSels
-	Uint16NeScalar = uint16NeScalar
-	Uint16NeNullableScalar = uint16NeNullableScalar
-	Uint16NeScalarSels = uint16NeScalarSels
-	Uint16NeNullableScalarSels = uint16NeNullableScalarSels
-	Uint32Ne = uint32Ne
-	Uint32NeNullable = uint32NeNullable
-	Uint32NeSels = uint32NeSels
-	Uint32NeNullableSels = uint32NeNullableSels
-	Uint32NeScalar = uint32NeScalar
-	Uint32NeNullableScalar = uint32NeNullableScalar
-	Uint32NeScalarSels = uint32NeScalarSels
-	Uint32NeNullableScalarSels = uint32NeNullableScalarSels
-	Uint64Ne = uint64Ne
-	Uint64NeNullable = uint64NeNullable
-	Uint64NeSels = uint64NeSels
-	Uint64NeNullableSels = uint64NeNullableSels
-	Uint64NeScalar = uint64NeScalar
-	Uint64NeNullableScalar = uint64NeNullableScalar
-	Uint64NeScalarSels = uint64NeScalarSels
-	Uint64NeNullableScalarSels = uint64NeNullableScalarSels
-	Float32Ne = float32Ne
-	Float32NeNullable = float32NeNullable
-	Float32NeSels = float32NeSels
-	Float32NeNullableSels = float32NeNullableSels
-	Float32NeScalar = float32NeScalar
-	Float32NeNullableScalar = float32NeNullableScalar
-	Float32NeScalarSels = float32NeScalarSels
-	Float32NeNullableScalarSels = float32NeNullableScalarSels
-	Float64Ne = float64Ne
-	Float64NeNullable = float64NeNullable
-	Float64NeSels = float64NeSels
-	Float64NeNullableSels = float64NeNullableSels
-	Float64NeScalar = float64NeScalar
-	Float64NeNullableScalar = float64NeNullableScalar
-	Float64NeScalarSels = float64NeScalarSels
-	Float64NeNullableScalarSels = float64NeNullableScalarSels
-	StrNe = strNe
-	StrNeNullable = strNeNullable
-	StrNeSels = strNeSels
-	StrNeNullableSels = strNeNullableSels
-	StrNeScalar = strNeScalar
-	StrNeNullableScalar = strNeNullableScalar
-	StrNeScalarSels = strNeScalarSels
+	Int8Ne                      = numericNe[int8]
+	Int8NeNullable              = numericNeNullable[int8]
+	Int8NeSels                  = numericNeSels[int8]
+	Int8NeNullableSels          = numericNeNullableSels[int8]
+	Int8NeScalar                = numericNeScalar[int8]
+	Int8NeNullableScalar        = numericNeNullableScalar[int8]
+	Int8NeScalarSels            = numericNeScalarSels[int8]
+	Int8NeNullableScalarSels    = numericNeNullableScalarSels[int8]
+	Int16Ne                     = numericNe[int16]
+	Int16NeNullable             = numericNeNullable[int16]
+	Int16NeSels                 = numericNeSels[int16]
+	Int16NeNullableSels         = numericNeNullableSels[int16]
+	Int16NeScalar               = numericNeScalar[int16]
+	Int16NeNullableScalar       = numericNeNullableScalar[int16]
+	Int16NeScalarSels           = numericNeScalarSels[int16]
+	Int16NeNullableScalarSels   = numericNeNullableScalarSels[int16]
+	Int32Ne                     = numericNe[int32]
+	Int32NeNullable             = numericNeNullable[int32]
+	Int32NeSels                 = numericNeSels[int32]
+	Int32NeNullableSels         = numericNeNullableSels[int32]
+	Int32NeScalar               = numericNeScalar[int32]
+	Int32NeNullableScalar       = numericNeNullableScalar[int32]
+	Int32NeScalarSels           = numericNeScalarSels[int32]
+	Int32NeNullableScalarSels   = numericNeNullableScalarSels[int32]
+	Int64Ne                     = numericNe[int64]
+	Int64NeNullable             = numericNeNullable[int64]
+	Int64NeSels                 = numericNeSels[int64]
+	Int64NeNullableSels         = numericNeNullableSels[int64]
+	Int64NeScalar               = numericNeScalar[int64]
+	Int64NeNullableScalar       = numericNeNullableScalar[int64]
+	Int64NeScalarSels           = numericNeScalarSels[int64]
+	Int64NeNullableScalarSels   = numericNeNullableScalarSels[int64]
+	Uint8Ne                     = numericNe[uint8]
+	Uint8NeNullable             = numericNeNullable[uint8]
+	Uint8NeSels                 = numericNeSels[uint8]
+	Uint8NeNullableSels         = numericNeNullableSels[uint8]
+	Uint8NeScalar               = numericNeScalar[uint8]
+	Uint8NeNullableScalar       = numericNeNullableScalar[uint8]
+	Uint8NeScalarSels           = numericNeScalarSels[uint8]
+	Uint8NeNullableScalarSels   = numericNeNullableScalarSels[uint8]
+	Uint16Ne                    = numericNe[uint16]
+	Uint16NeNullable            = numericNeNullable[uint16]
+	Uint16NeSels                = numericNeSels[uint16]
+	Uint16NeNullableSels        = numericNeNullableSels[uint16]
+	Uint16NeScalar              = numericNeScalar[uint16]
+	Uint16NeNullableScalar      = numericNeNullableScalar[uint16]
+	Uint16NeScalarSels          = numericNeScalarSels[uint16]
+	Uint16NeNullableScalarSels  = numericNeNullableScalarSels[uint16]
+	Uint32Ne                    = numericNe[uint32]
+	Uint32NeNullable            = numericNeNullable[uint32]
+	Uint32NeSels                = numericNeSels[uint32]
+	Uint32NeNullableSels        = numericNeNullableSels[uint32]
+	Uint32NeScalar              = numericNeScalar[uint32]
+	Uint32NeNullableScalar      = numericNeNullableScalar[uint32]
+	Uint32NeScalarSels          = numericNeScalarSels[uint32]
+	Uint32NeNullableScalarSels  = numericNeNullableScalarSels[uint32]
+	Uint64Ne                    = numericNe[uint64]
+	Uint64NeNullable            = numericNeNullable[uint64]
+	Uint64NeSels                = numericNeSels[uint64]
+	Uint64NeNullableSels        = numericNeNullableSels[uint64]
+	Uint64NeScalar              = numericNeScalar[uint64]
+	Uint64NeNullableScalar      = numericNeNullableScalar[uint64]
+	Uint64NeScalarSels          = numericNeScalarSels[uint64]
+	Uint64NeNullableScalarSels  = numericNeNullableScalarSels[uint64]
+	Float32Ne                   = numericNe[float32]
+	Float32NeNullable           = numericNeNullable[float32]
+	Float32NeSels               = numericNeSels[float32]
+	Float32NeNullableSels       = numericNeNullableSels[float32]
+	Float32NeScalar             = numericNeScalar[float32]
+	Float32NeNullableScalar     = numericNeNullableScalar[float32]
+	Float32NeScalarSels         = numericNeScalarSels[float32]
+	Float32NeNullableScalarSels = numericNeNullableScalarSels[float32]
+	Float64Ne                   = numericNe[float64]
+	Float64NeNullable           = numericNeNullable[float64]
+	Float64NeSels               = numericNeSels[float64]
+	Float64NeNullableSels       = numericNeNullableSels[float64]
+	Float64NeScalar             = numericNeScalar[float64]
+	Float64NeNullableScalar     = numericNeNullableScalar[float64]
+	Float64NeScalarSels         = numericNeScalarSels[float64]
+	Float64NeNullableScalarSels = numericNeNullableScalarSels[float64]
+
+	StrNe                   = strNe
+	StrNeNullable           = strNeNullable
+	StrNeSels               = strNeSels
+	StrNeNullableSels       = strNeNullableSels
+	StrNeScalar             = strNeScalar
+	StrNeNullableScalar     = strNeNullableScalar
+	StrNeScalarSels         = strNeScalarSels
 	StrNeNullableScalarSels = strNeNullableScalarSels
-	Decimal64Ne = decimal64Ne
-	Decimal64NeNullable = decimal64NeNullable
-	Decimal64NeSels = decimal64NeSels
-	Decimal64NeNullableSels = decimal64NeNullableSels
-	Decimal64NeScalar = decimal64NeScalar
-	Decimal64NeNullableScalar = decimal64NeNullableScalar
-	Decimal64NeScalarSels = decimal64NeScalarSels
-	Decimal64NeNullableScalarSels = decimal64NeNullableScalarSels
-	Decimal128Ne = decimal128Ne
-	Decimal128NeNullable = decimal128NeNullable
-	Decimal128NeSels = decimal128NeSels
-	Decimal128NeNullableSels = decimal128NeNullableSels
-	Decimal128NeScalar = decimal128NeScalar
-	Decimal128NeNullableScalar = decimal128NeNullableScalar
-	Decimal128NeScalarSels = decimal128NeScalarSels
-	Decimal128NeNullableScalarSels = decimal128NeNullableScalarSels
-}
-
-func int8Ne(xs, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeNullable(xs, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeSels(xs, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeNullableSels(xs, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeScalar(x int8, ys []int8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeNullableScalar(x int8, ys []int8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeScalarSels(x int8, ys []int8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int8NeNullableScalarSels(x int8, ys []int8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16Ne(xs, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
 
-func int16NeNullable(xs, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16NeSels(xs, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16NeNullableSels(xs, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16NeScalar(x int16, ys []int16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16NeNullableScalar(x int16, ys []int16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16NeScalarSels(x int16, ys []int16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int16NeNullableScalarSels(x int16, ys []int16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32Ne(xs, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeNullable(xs, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeSels(xs, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeNullableSels(xs, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeScalar(x int32, ys []int32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeNullableScalar(x int32, ys []int32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeScalarSels(x int32, ys []int32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int32NeNullableScalarSels(x int32, ys []int32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64Ne(xs, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeNullable(xs, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeSels(xs, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeNullableSels(xs, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeScalar(x int64, ys []int64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeNullableScalar(x int64, ys []int64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeScalarSels(x int64, ys []int64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func int64NeNullableScalarSels(x int64, ys []int64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8Ne(xs, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeNullable(xs, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeSels(xs, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeNullableSels(xs, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeScalar(x uint8, ys []uint8, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeNullableScalar(x uint8, ys []uint8, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeScalarSels(x uint8, ys []uint8, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint8NeNullableScalarSels(x uint8, ys []uint8, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16Ne(xs, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeNullable(xs, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeSels(xs, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeNullableSels(xs, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeScalar(x uint16, ys []uint16, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeNullableScalar(x uint16, ys []uint16, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeScalarSels(x uint16, ys []uint16, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint16NeNullableScalarSels(x uint16, ys []uint16, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32Ne(xs, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeNullable(xs, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeSels(xs, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeNullableSels(xs, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeScalar(x uint32, ys []uint32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeNullableScalar(x uint32, ys []uint32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeScalarSels(x uint32, ys []uint32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint32NeNullableScalarSels(x uint32, ys []uint32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64Ne(xs, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeNullable(xs, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeSels(xs, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeNullableSels(xs, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeScalar(x uint64, ys []uint64, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeNullableScalar(x uint64, ys []uint64, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeScalarSels(x uint64, ys []uint64, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func uint64NeNullableScalarSels(x uint64, ys []uint64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32Ne(xs, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, x := range xs {
-		if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeNullable(xs, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, x := range xs {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != ys[i] {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeSels(xs, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeNullableSels(xs, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeScalar(x float32, ys []float32, rs []int64) []int64 {
-	rsi := 0
-	for i, y := range ys {
-		if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeNullableScalar(x float32, ys []float32, nulls *roaring.Bitmap, rs []int64) []int64 {
-	rsi := 0
-	nullsIter := nulls.Iterator()
-	nextNull := 0
-
-	if nullsIter.HasNext() {
-		nextNull = int(nullsIter.Next())
-	} else {
-		nextNull = -1
-	}
-
-	for i, y := range ys {
-		if i == nextNull {
-			if nullsIter.HasNext() {
-				nextNull = int(nullsIter.Next())
-			} else {
-				nextNull = -1
-			}
-		} else if x != y {
-			rs[rsi] = int64(i)
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeScalarSels(x float32, ys []float32, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
-
-func float32NeNullableScalarSels(x float32, ys []float32, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
-	rsi := 0
-	for _, sel := range sels {
-		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
-			rs[rsi] = sel
-			rsi++
-		}
-	}
-	return rs[:rsi]
-}
+	Decimal64Ne                    = decimal64Ne
+	Decimal64NeNullable            = decimal64NeNullable
+	Decimal64NeSels                = decimal64NeSels
+	Decimal64NeNullableSels        = decimal64NeNullableSels
+	Decimal64NeScalar              = decimal64NeScalar
+	Decimal64NeNullableScalar      = decimal64NeNullableScalar
+	Decimal64NeScalarSels          = decimal64NeScalarSels
+	Decimal64NeNullableScalarSels  = decimal64NeNullableScalarSels
+	Decimal128Ne                   = decimal128Ne
+	Decimal128NeNullable           = decimal128NeNullable
+	Decimal128NeSels               = decimal128NeSels
+	Decimal128NeNullableSels       = decimal128NeNullableSels
+	Decimal128NeScalar             = decimal128NeScalar
+	Decimal128NeNullableScalar     = decimal128NeNullableScalar
+	Decimal128NeScalarSels         = decimal128NeScalarSels
+	Decimal128NeNullableScalarSels = decimal128NeNullableScalarSels
+)
 
-func float64Ne(xs, ys []float64, rs []int64) []int64 {
+func numericNe[T constraints.Integer | constraints.Float](xs, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, x := range xs {
 		if x != ys[i] {
@@ -1310,7 +144,7 @@ func float64Ne(xs, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64NeNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericNeNullable[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1336,7 +170,7 @@ func float64NeNullable(xs, ys []float64, nulls *roaring.Bitmap, rs []int64) []in
 	return rs[:rsi]
 }
 
-func float64NeSels(xs, ys []float64, rs, sels []int64) []int64 {
+func numericNeSels[T constraints.Integer | constraints.Float](xs, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if xs[sel] != ys[sel] {
@@ -1347,7 +181,7 @@ func float64NeSels(xs, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64NeNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericNeNullableSels[T constraints.Integer | constraints.Float](xs, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && xs[sel] != ys[sel] {
@@ -1358,7 +192,7 @@ func float64NeNullableSels(xs, ys []float64, nulls *roaring.Bitmap, rs, sels []i
 	return rs[:rsi]
 }
 
-func float64NeScalar(x float64, ys []float64, rs []int64) []int64 {
+func numericNeScalar[T constraints.Integer | constraints.Float](x T, ys []T, rs []int64) []int64 {
 	rsi := 0
 	for i, y := range ys {
 		if x != y {
@@ -1369,7 +203,7 @@ func float64NeScalar(x float64, ys []float64, rs []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64NeNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs []int64) []int64 {
+func numericNeNullableScalar[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs []int64) []int64 {
 	rsi := 0
 	nullsIter := nulls.Iterator()
 	nextNull := 0
@@ -1395,7 +229,7 @@ func float64NeNullableScalar(x float64, ys []float64, nulls *roaring.Bitmap, rs
 	return rs[:rsi]
 }
 
-func float64NeScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
+func numericNeScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if x != ys[sel] {
@@ -1406,7 +240,7 @@ func float64NeScalarSels(x float64, ys []float64, rs, sels []int64) []int64 {
 	return rs[:rsi]
 }
 
-func float64NeNullableScalarSels(x float64, ys []float64, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
+func numericNeNullableScalarSels[T constraints.Integer | constraints.Float](x T, ys []T, nulls *roaring.Bitmap, rs, sels []int64) []int64 {
 	rsi := 0
 	for _, sel := range sels {
 		if !nulls.Contains(uint64(sel)) && x != ys[sel] {
diff --git a/pkg/vectorize/neg/neg.go b/pkg/vectorize/neg/neg.go
index d7d10f4cd53fc42826a035b23b5106aa4567fa11..848ccae19fba30180bb9fc0f5933eb085642b6fb 100644
--- a/pkg/vectorize/neg/neg.go
+++ b/pkg/vectorize/neg/neg.go
@@ -14,51 +14,18 @@
 
 package neg
 
+import "golang.org/x/exp/constraints"
+
 var (
-	Int8Neg    func([]int8, []int8) []int8
-	Int16Neg   func([]int16, []int16) []int16
-	Int32Neg   func([]int32, []int32) []int32
-	Int64Neg   func([]int64, []int64) []int64
-	Float32Neg func([]float32, []float32) []float32
-	Float64Neg func([]float64, []float64) []float64
+	Int8Neg    = numericNeg[int8]
+	Int16Neg   = numericNeg[int16]
+	Int32Neg   = numericNeg[int32]
+	Int64Neg   = numericNeg[int64]
+	Float32Neg = numericNeg[float32]
+	Float64Neg = numericNeg[float64]
 )
 
-func int8Neg(xs, rs []int8) []int8 {
-	for i, x := range xs {
-		rs[i] = -x
-	}
-	return rs
-}
-
-func int16Neg(xs, rs []int16) []int16 {
-	for i, x := range xs {
-		rs[i] = -x
-	}
-	return rs
-}
-
-func int32Neg(xs, rs []int32) []int32 {
-	for i, x := range xs {
-		rs[i] = -x
-	}
-	return rs
-}
-
-func int64Neg(xs, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = -x
-	}
-	return rs
-}
-
-func float32Neg(xs, rs []float32) []float32 {
-	for i, x := range xs {
-		rs[i] = -x
-	}
-	return rs
-}
-
-func float64Neg(xs, rs []float64) []float64 {
+func numericNeg[T constraints.Signed | constraints.Float](xs, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = -x
 	}
diff --git a/pkg/vectorize/neg/neg_amd64.go b/pkg/vectorize/neg/neg_amd64.go
index 54e57b3211986f1ab12ca38a634c85d334bbb824..11fead18a4354d44098661779d03ebf73b4e0d0f 100644
--- a/pkg/vectorize/neg/neg_amd64.go
+++ b/pkg/vectorize/neg/neg_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package neg
 
 import (
@@ -49,13 +46,6 @@ func init() {
 		Int64Neg = int64NegAvx2
 		Float32Neg = float32NegAvx2
 		Float64Neg = float64NegAvx2
-	} else {
-		Int8Neg = int8Neg
-		Int16Neg = int16Neg
-		Int32Neg = int32Neg
-		Int64Neg = int64Neg
-		Float32Neg = float32Neg
-		Float64Neg = float64Neg
 	}
 }
 
diff --git a/pkg/vectorize/neg/neg_arm64.go b/pkg/vectorize/neg/neg_arm64.go
deleted file mode 100644
index 97bd1d6b7376aa56e975adf3c3a37fec595fe975..0000000000000000000000000000000000000000
--- a/pkg/vectorize/neg/neg_arm64.go
+++ /dev/null
@@ -1,27 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build arm64
-// +build arm64
-
-package neg
-
-func init() {
-	Int8Neg = int8Neg
-	Int16Neg = int16Neg
-	Int32Neg = int32Neg
-	Int64Neg = int64Neg
-	Float32Neg = float32Neg
-	Float64Neg = float64Neg
-}
diff --git a/pkg/vectorize/neg/avx2.s b/pkg/vectorize/neg/neg_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/neg/avx2.s
rename to pkg/vectorize/neg/neg_avx2_amd64.s
index de8377d484dcf079cb6316e230b48390fa9425bf..792162c1a917caee70c98a96ecb773f1808e0515 100644
--- a/pkg/vectorize/neg/avx2.s
+++ b/pkg/vectorize/neg/neg_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/neg/avx512.s b/pkg/vectorize/neg/neg_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/neg/avx512.s
rename to pkg/vectorize/neg/neg_avx512_amd64.s
index 133af2fa0b9d50e636b09a53401764d6a5ce034f..0df60ba2abf5d56f6d1dd3210c5668d1ed6c5e12 100644
--- a/pkg/vectorize/neg/avx512.s
+++ b/pkg/vectorize/neg/neg_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/neg/neg_test.go b/pkg/vectorize/neg/neg_test.go
index f0308f5b82d829591202b876692eff73fc3448d5..07fbb69b648230f7fb9bf9cb741250763a88c7d0 100644
--- a/pkg/vectorize/neg/neg_test.go
+++ b/pkg/vectorize/neg/neg_test.go
@@ -39,12 +39,12 @@ func TestF64Sum(t *testing.T) {
 	xs := makeFbuffer(100)
 	rs := make([]float32, 100)
 	fmt.Printf("float neg: %v\n", Float32Neg(xs, rs))
-	fmt.Printf("pure float neg: %v\n", float32Neg(xs, rs))
+	fmt.Printf("pure float neg: %v\n", numericNeg(xs, rs))
 }
 
 func TestI64Sum(t *testing.T) {
 	xs := makeIbuffer(100)
 	rs := make([]int64, 100)
 	fmt.Printf("int neg: %v\n", Int64Neg(xs, rs))
-	fmt.Printf("pure int neg: %v\n", int64Neg(xs, rs))
+	fmt.Printf("pure int neg: %v\n", numericNeg(xs, rs))
 }
diff --git a/pkg/vectorize/not/not.go b/pkg/vectorize/not/not.go
index ac36275bae469561bc6cf62ffa0155f6bee8b015..33bef3ddef921fac25885f90da573c2f52b598f7 100644
--- a/pkg/vectorize/not/not.go
+++ b/pkg/vectorize/not/not.go
@@ -14,132 +14,22 @@
 
 package not
 
+import "golang.org/x/exp/constraints"
+
 var (
-	Int8Not    func([]int8, []int8) []int8
-	Int16Not   func([]int16, []int8) []int8
-	Int32Not   func([]int32, []int8) []int8
-	Int64Not   func([]int64, []int8) []int8
-	Float32Not func([]float32, []int8) []int8
-	Float64Not func([]float64, []int8) []int8
-	Uint8Not   func([]uint8, []int8) []int8
-	Uint16Not  func([]uint16, []int8) []int8
-	Uint32Not  func([]uint32, []int8) []int8
-	Uint64Not  func([]uint64, []int8) []int8
+	Int8Not    = numericNot[int8]
+	Int16Not   = numericNot[int16]
+	Int32Not   = numericNot[int32]
+	Int64Not   = numericNot[int64]
+	Float32Not = numericNot[float32]
+	Float64Not = numericNot[float64]
+	Uint8Not   = numericNot[uint8]
+	Uint16Not  = numericNot[uint16]
+	Uint32Not  = numericNot[uint32]
+	Uint64Not  = numericNot[uint64]
 )
 
-func init() {
-	Int8Not = int8Not
-	Int16Not = int16Not
-	Int32Not = int32Not
-	Int64Not = int64Not
-	Float32Not = float32Not
-	Float64Not = float64Not
-	Uint8Not = uint8Not
-	Uint16Not = uint16Not
-	Uint32Not = uint32Not
-	Uint64Not = uint64Not
-}
-
-func int8Not(xs []int8, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func int16Not(xs []int16, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func int32Not(xs []int32, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func int64Not(xs []int64, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func float32Not(xs []float32, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func float64Not(xs []float64, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func uint8Not(xs []uint8, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func uint16Not(xs []uint16, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func uint32Not(xs []uint32, rs []int8) []int8 {
-	for i, x := range xs {
-		if x == 0 {
-			rs[i] = 1
-		} else {
-			rs[i] = 0
-		}
-	}
-	return rs
-}
-
-func uint64Not(xs []uint64, rs []int8) []int8 {
+func numericNot[T constraints.Integer | constraints.Float](xs []T, rs []int8) []int8 {
 	for i, x := range xs {
 		if x == 0 {
 			rs[i] = 1
diff --git a/pkg/vectorize/or/or_amd64.go b/pkg/vectorize/or/or_amd64.go
index abcdab96ed87fb6336fa1538712f4058a967c6e2..a46ad03315490206b3575e290d8676501683e8b3 100644
--- a/pkg/vectorize/or/or_amd64.go
+++ b/pkg/vectorize/or/or_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package or
 
 func orX86Asm(x []int64, y []int64, r []int64) int64
diff --git a/pkg/vectorize/or/x86.s b/pkg/vectorize/or/or_amd64.s
similarity index 97%
rename from pkg/vectorize/or/x86.s
rename to pkg/vectorize/or/or_amd64.s
index 32e74bde616a9f79f814c01d804d7c3d06bbc054..e746228a0cbca2e2878166bddbf7ee58721b443c 100644
--- a/pkg/vectorize/or/x86.s
+++ b/pkg/vectorize/or/or_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run x86.go -out x86.s -stubs x86_stubs.go. DO NOT EDIT.
-// +build 386 amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/round/round_amd64.go b/pkg/vectorize/round/round_amd64.go
index 536c6e7c3b14341b5580d85388a3bc7ce5320a9a..0297b0c40e329c40cb964b768071b34ced9caf0e 100644
--- a/pkg/vectorize/round/round_amd64.go
+++ b/pkg/vectorize/round/round_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package round
 
 import (
diff --git a/pkg/vectorize/shuffle/shuffle.go b/pkg/vectorize/shuffle/shuffle.go
index 844cd154117a31b97085de6f3c617bdbe3a1303e..7a37b58df5bc3d4f261e0de1dc93ff707103402a 100644
--- a/pkg/vectorize/shuffle/shuffle.go
+++ b/pkg/vectorize/shuffle/shuffle.go
@@ -16,227 +16,39 @@ package shuffle
 
 import (
 	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
 var (
-	int8Shuffle  func([]int8, []int8, []int64) []int8
-	int16Shuffle func([]int16, []int16, []int64) []int16
-	int32Shuffle func([]int32, []int32, []int64) []int32
-	int64Shuffle func([]int64, []int64, []int64) []int64
+	Int8Shuffle  = fixedLengthShuffle[int8]
+	Int16Shuffle = fixedLengthShuffle[int16]
+	Int32Shuffle = fixedLengthShuffle[int32]
+	Int64Shuffle = fixedLengthShuffle[int64]
 
-	uint8Shuffle  func([]uint8, []uint8, []int64) []uint8
-	uint16Shuffle func([]uint16, []uint16, []int64) []uint16
-	uint32Shuffle func([]uint32, []uint32, []int64) []uint32
-	uint64Shuffle func([]uint64, []uint64, []int64) []uint64
+	Uint8Shuffle  = fixedLengthShuffle[uint8]
+	Uint16Shuffle = fixedLengthShuffle[uint16]
+	Uint32Shuffle = fixedLengthShuffle[uint32]
+	Uint64Shuffle = fixedLengthShuffle[uint64]
 
-	float32Shuffle func([]float32, []float32, []int64) []float32
-	float64Shuffle func([]float64, []float64, []int64) []float64
+	Float32Shuffle = fixedLengthShuffle[float32]
+	Float64Shuffle = fixedLengthShuffle[float64]
 
-	dateShuffle     func([]types.Date, []types.Date, []int64) []types.Date
-	datetimeShuffle func([]types.Datetime, []types.Datetime, []int64) []types.Datetime
+	Decimal64Shuffle  = fixedLengthShuffle[types.Decimal64]
+	Decimal128Shuffle = fixedLengthShuffle[types.Decimal128]
 
-	decimal64Shuffle  func([]types.Decimal64, []types.Decimal64, []int64) []types.Decimal64
-	decimal128Shuffle func([]types.Decimal128, []types.Decimal128, []int64) []types.Decimal128
+	DateShuffle     = fixedLengthShuffle[types.Date]
+	DatetimeShuffle = fixedLengthShuffle[types.Datetime]
 
-	tupleShuffle func([][]interface{}, [][]interface{}, []int64) [][]interface{}
+	TupleShuffle = tupleShuffle
 
-	strShuffle func(*types.Bytes, []uint32, []uint32, []int64) *types.Bytes
+	StrShuffle = strShuffle
 )
 
-func init() {
-	int8Shuffle = int8ShufflePure
-	int16Shuffle = int16ShufflePure
-	int32Shuffle = int32ShufflePure
-	int64Shuffle = int64ShufflePure
-
-	uint8Shuffle = uint8ShufflePure
-	uint16Shuffle = uint16ShufflePure
-	uint32Shuffle = uint32ShufflePure
-	uint64Shuffle = uint64ShufflePure
-
-	float32Shuffle = float32ShufflePure
-	float64Shuffle = float64ShufflePure
-
-	decimal64Shuffle = decimal64ShufflePure
-	decimal128Shuffle = decimal128ShufflePure
-
-	dateShuffle = dateShufflePure
-	datetimeShuffle = datetimeShufflePure
-
-	tupleShuffle = tupleShufflePure
-
-	strShuffle = strShufflePure
-}
-
-func Int8Shuffle(vs, ws []int8, sels []int64) []int8 {
-	return int8Shuffle(vs, ws, sels)
-}
-
-func Int16Shuffle(vs, ws []int16, sels []int64) []int16 {
-	return int16Shuffle(vs, ws, sels)
-}
-
-func Int32Shuffle(vs, ws []int32, sels []int64) []int32 {
-	return int32Shuffle(vs, ws, sels)
-}
-
-func Int64Shuffle(vs, ws []int64, sels []int64) []int64 {
-	return int64Shuffle(vs, ws, sels)
-}
-
-func Uint8Shuffle(vs, ws []uint8, sels []int64) []uint8 {
-	return uint8Shuffle(vs, ws, sels)
-}
-
-func Uint16Shuffle(vs, ws []uint16, sels []int64) []uint16 {
-	return uint16Shuffle(vs, ws, sels)
-}
-
-func Uint32Shuffle(vs, ws []uint32, sels []int64) []uint32 {
-	return uint32Shuffle(vs, ws, sels)
-}
-
-func Uint64Shuffle(vs, ws []uint64, sels []int64) []uint64 {
-	return uint64Shuffle(vs, ws, sels)
-}
-
-func Float32Shuffle(vs, ws []float32, sels []int64) []float32 {
-	return float32Shuffle(vs, ws, sels)
-}
-
-func Float64Shuffle(vs, ws []float64, sels []int64) []float64 {
-	return float64Shuffle(vs, ws, sels)
-}
-
-func Decimal64Shuffle(vs, ws []types.Decimal64, sels []int64) []types.Decimal64 {
-	return decimal64Shuffle(vs, ws, sels)
-}
-
-func Decimal128Shuffle(vs, ws []types.Decimal128, sels []int64) []types.Decimal128 {
-	return decimal128Shuffle(vs, ws, sels)
-}
-
-func DateShuffle(vs []types.Date, ws []types.Date, sels []int64) []types.Date {
-	return dateShuffle(vs, ws, sels)
-}
-
-func DatetimeShuffle(vs []types.Datetime, ws []types.Datetime, sels []int64) []types.Datetime {
-	return datetimeShuffle(vs, ws, sels)
-}
-
-func TupleShuffle(vs, ws [][]interface{}, sels []int64) [][]interface{} {
-	return tupleShuffle(vs, ws, sels)
-}
-
-func StrShuffle(vs *types.Bytes, os, ns []uint32, sels []int64) *types.Bytes {
-	return strShuffle(vs, os, ns, sels)
-}
-
-func int8ShufflePure(vs, ws []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func int16ShufflePure(vs, ws []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func int32ShufflePure(vs, ws []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func int64ShufflePure(vs, ws []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func uint8ShufflePure(vs, ws []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func uint16ShufflePure(vs, ws []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func uint32ShufflePure(vs, ws []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func uint64ShufflePure(vs, ws []uint64, sels []int64) []uint64 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func float32ShufflePure(vs, ws []float32, sels []int64) []float32 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func float64ShufflePure(vs, ws []float64, sels []int64) []float64 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func decimal64ShufflePure(vs []types.Decimal64, ws []types.Decimal64, sels []int64) []types.Decimal64 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func decimal128ShufflePure(vs []types.Decimal128, ws []types.Decimal128, sels []int64) []types.Decimal128 {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
-}
-
-func dateShufflePure(vs []types.Date, ws []types.Date, sels []int64) []types.Date {
-	for i, sel := range sels {
-		ws[i] = vs[sel]
-	}
-	copy(vs, ws)
-	return vs[:len(sels)]
+type fixedLength interface {
+	constraints.Integer | constraints.Float | types.Decimal128
 }
 
-func datetimeShufflePure(vs []types.Datetime, ws []types.Datetime, sels []int64) []types.Datetime {
+func fixedLengthShuffle[T fixedLength](vs, ws []T, sels []int64) []T {
 	for i, sel := range sels {
 		ws[i] = vs[sel]
 	}
@@ -244,14 +56,14 @@ func datetimeShufflePure(vs []types.Datetime, ws []types.Datetime, sels []int64)
 	return vs[:len(sels)]
 }
 
-func tupleShufflePure(vs, ws [][]interface{}, sels []int64) [][]interface{} {
+func tupleShuffle(vs, ws [][]interface{}, sels []int64) [][]interface{} {
 	for i, sel := range sels {
 		ws[i] = vs[sel]
 	}
 	return ws[:len(sels)]
 }
 
-func strShufflePure(vs *types.Bytes, os, ns []uint32, sels []int64) *types.Bytes {
+func strShuffle(vs *types.Bytes, os, ns []uint32, sels []int64) *types.Bytes {
 	for i, sel := range sels {
 		os[i] = vs.Offsets[sel]
 		ns[i] = vs.Lengths[sel]
diff --git a/pkg/vectorize/sub/sub.go b/pkg/vectorize/sub/sub.go
index 72306510b67c0761b5bfc389a4808b687a1d43df..85681381736cf9af7d83fd7e0a778db5ada5126b 100644
--- a/pkg/vectorize/sub/sub.go
+++ b/pkg/vectorize/sub/sub.go
@@ -15,729 +15,242 @@
 package sub
 
 import (
-	"github.com/matrixorigin/matrixone/pkg/container/types"
 	"math"
-)
 
-var (
-	Int8Sub                func([]int8, []int8, []int8) []int8
-	Int8SubSels            func([]int8, []int8, []int8, []int64) []int8
-	Int8SubScalar          func(int8, []int8, []int8) []int8
-	Int8SubScalarSels      func(int8, []int8, []int8, []int64) []int8
-	Int8SubByScalar        func(int8, []int8, []int8) []int8
-	Int8SubByScalarSels    func(int8, []int8, []int8, []int64) []int8
-	Int16Sub               func([]int16, []int16, []int16) []int16
-	Int16SubSels           func([]int16, []int16, []int16, []int64) []int16
-	Int16SubScalar         func(int16, []int16, []int16) []int16
-	Int16SubScalarSels     func(int16, []int16, []int16, []int64) []int16
-	Int16SubByScalar       func(int16, []int16, []int16) []int16
-	Int16SubByScalarSels   func(int16, []int16, []int16, []int64) []int16
-	Int32Sub               func([]int32, []int32, []int32) []int32
-	Int32SubSels           func([]int32, []int32, []int32, []int64) []int32
-	Int32SubScalar         func(int32, []int32, []int32) []int32
-	Int32SubScalarSels     func(int32, []int32, []int32, []int64) []int32
-	Int32SubByScalar       func(int32, []int32, []int32) []int32
-	Int32SubByScalarSels   func(int32, []int32, []int32, []int64) []int32
-	Int64Sub               func([]int64, []int64, []int64) []int64
-	Int64SubSels           func([]int64, []int64, []int64, []int64) []int64
-	Int64SubScalar         func(int64, []int64, []int64) []int64
-	Int64SubScalarSels     func(int64, []int64, []int64, []int64) []int64
-	Int64SubByScalar       func(int64, []int64, []int64) []int64
-	Int64SubByScalarSels   func(int64, []int64, []int64, []int64) []int64
-	Uint8Sub               func([]uint8, []uint8, []uint8) []uint8
-	Uint8SubSels           func([]uint8, []uint8, []uint8, []int64) []uint8
-	Uint8SubScalar         func(uint8, []uint8, []uint8) []uint8
-	Uint8SubScalarSels     func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint8SubByScalar       func(uint8, []uint8, []uint8) []uint8
-	Uint8SubByScalarSels   func(uint8, []uint8, []uint8, []int64) []uint8
-	Uint16Sub              func([]uint16, []uint16, []uint16) []uint16
-	Uint16SubSels          func([]uint16, []uint16, []uint16, []int64) []uint16
-	Uint16SubScalar        func(uint16, []uint16, []uint16) []uint16
-	Uint16SubScalarSels    func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint16SubByScalar      func(uint16, []uint16, []uint16) []uint16
-	Uint16SubByScalarSels  func(uint16, []uint16, []uint16, []int64) []uint16
-	Uint32Sub              func([]uint32, []uint32, []uint32) []uint32
-	Uint32SubSels          func([]uint32, []uint32, []uint32, []int64) []uint32
-	Uint32SubScalar        func(uint32, []uint32, []uint32) []uint32
-	Uint32SubScalarSels    func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint32SubByScalar      func(uint32, []uint32, []uint32) []uint32
-	Uint32SubByScalarSels  func(uint32, []uint32, []uint32, []int64) []uint32
-	Uint64Sub              func([]uint64, []uint64, []uint64) []uint64
-	Uint64SubSels          func([]uint64, []uint64, []uint64, []int64) []uint64
-	Uint64SubScalar        func(uint64, []uint64, []uint64) []uint64
-	Uint64SubScalarSels    func(uint64, []uint64, []uint64, []int64) []uint64
-	Uint64SubByScalar      func(uint64, []uint64, []uint64) []uint64
-	Uint64SubByScalarSels  func(uint64, []uint64, []uint64, []int64) []uint64
-	Float32Sub             func([]float32, []float32, []float32) []float32
-	Float32SubSels         func([]float32, []float32, []float32, []int64) []float32
-	Float32SubScalar       func(float32, []float32, []float32) []float32
-	Float32SubScalarSels   func(float32, []float32, []float32, []int64) []float32
-	Float32SubByScalar     func(float32, []float32, []float32) []float32
-	Float32SubByScalarSels func(float32, []float32, []float32, []int64) []float32
-	Float64Sub             func([]float64, []float64, []float64) []float64
-	Float64SubSels         func([]float64, []float64, []float64, []int64) []float64
-	Float64SubScalar       func(float64, []float64, []float64) []float64
-	Float64SubScalarSels   func(float64, []float64, []float64, []int64) []float64
-	Float64SubByScalar     func(float64, []float64, []float64) []float64
-	Float64SubByScalarSels func(float64, []float64, []float64, []int64) []float64
-
-	Decimal64Sub              func([]types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64) []types.Decimal64
-	Decimal64SubSels          func([]types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64, []int64) []types.Decimal64
-	Decimal64SubScalar        func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64) []types.Decimal64
-	Decimal64SubScalarSels    func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64, []int64) []types.Decimal64
-	Decimal64SubByScalar      func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64) []types.Decimal64
-	Decimal64SubByScalarSels  func(types.Decimal64, []types.Decimal64, int32, int32, []types.Decimal64, []int64) []types.Decimal64
-	Decimal128Sub             func([]types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128SubSels         func([]types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128SubScalar       func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128SubScalarSels   func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-	Decimal128SubByScalar     func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128) []types.Decimal128
-	Decimal128SubByScalarSels func(types.Decimal128, []types.Decimal128, int32, int32, []types.Decimal128, []int64) []types.Decimal128
-
-	Int32Int64Sub         func([]int64, []int32, []int64) []int64
-	Int32Int64SubSels     func([]int64, []int32, []int64, []int64) []int64
-	Int16Int64Sub         func([]int64, []int16, []int64) []int64
-	Int16Int64SubSels     func([]int64, []int16, []int64, []int64) []int64
-	Int8Int64Sub          func([]int64, []int8, []int64) []int64
-	Int8Int64SubSels      func([]int64, []int8, []int64, []int64) []int64
-	Int16Int32Sub         func([]int32, []int16, []int32) []int32
-	Int16Int32SubSels     func([]int32, []int16, []int32, []int64) []int32
-	Int8Int32Sub          func([]int32, []int8, []int32) []int32
-	Int8Int32SubSels      func([]int32, []int8, []int32, []int64) []int32
-	Int8Int16Sub          func([]int16, []int8, []int16) []int16
-	Int8Int16SubSels      func([]int16, []int8, []int16, []int64) []int16
-	Float32Float64Sub     func([]float64, []float32, []float64) []float64
-	Float32Float64SubSels func([]float64, []float32, []float64, []int64) []float64
-	Uint32Uint64Sub       func([]uint64, []uint32, []uint64) []uint64
-	Uint32Uint64SubSels   func([]uint64, []uint32, []uint64, []int64) []uint64
-	Uint16Uint64Sub       func([]uint64, []uint16, []uint64) []uint64
-	Uint16Uint64SubSels   func([]uint64, []uint16, []uint64, []int64) []uint64
-	Uint8Uint64Sub        func([]uint64, []uint8, []uint64) []uint64
-	Uint8Uint64SubSels    func([]uint64, []uint8, []uint64, []int64) []uint64
-	Uint16Uint32Sub       func([]uint32, []uint16, []uint32) []uint32
-	Uint16Uint32SubSels   func([]uint32, []uint16, []uint32, []int64) []uint32
-	Uint8Uint32Sub        func([]uint32, []uint8, []uint32) []uint32
-	Uint8Uint32SubSels    func([]uint32, []uint8, []uint32, []int64) []uint32
-	Uint8Uint16Sub        func([]uint16, []uint8, []uint16) []uint16
-	Uint8Uint16SubSels    func([]uint16, []uint8, []uint16, []int64) []uint16
+	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
 )
 
-func init() {
-	Decimal64Sub = decimal64Sub
-	Decimal64SubSels = decimal64SubSels
-	Decimal64SubScalar = decimal64SubScalar
-	Decimal64SubScalarSels = decimal64SubScalarSels
-	Decimal64SubByScalar = decimal64SubByScalar
-	Decimal64SubByScalarSels = decimal64SubByScalarSels
-	Decimal128Sub = decimal128Sub
-	Decimal128SubSels = decimal128SubSels
-	Decimal128SubScalar = decimal128SubScalar
-	Decimal128SubScalarSels = decimal128SubScalarSels
-	Decimal128SubByScalar = decimal128SubByScalar
+var (
+	Int8Sub                = numericSub[int8]
+	Int8SubScalar          = numericSubScalar[int8]
+	Int8SubByScalar        = numericSubByScalar[int8]
+	Int16Sub               = numericSub[int16]
+	Int16SubScalar         = numericSubScalar[int16]
+	Int16SubByScalar       = numericSubByScalar[int16]
+	Int32Sub               = numericSub[int32]
+	Int32SubScalar         = numericSubScalar[int32]
+	Int32SubByScalar       = numericSubByScalar[int32]
+	Int64Sub               = numericSub[int64]
+	Int64SubScalar         = numericSubScalar[int64]
+	Int64SubByScalar       = numericSubByScalar[int64]
+	Uint8Sub               = numericSub[uint8]
+	Uint8SubScalar         = numericSubScalar[uint8]
+	Uint8SubByScalar       = numericSubByScalar[uint8]
+	Uint16Sub              = numericSub[uint16]
+	Uint16SubScalar        = numericSubScalar[uint16]
+	Uint16SubByScalar      = numericSubByScalar[uint16]
+	Uint32Sub              = numericSub[uint32]
+	Uint32SubScalar        = numericSubScalar[uint32]
+	Uint32SubByScalar      = numericSubByScalar[uint32]
+	Uint64Sub              = numericSub[uint64]
+	Uint64SubScalar        = numericSubScalar[uint64]
+	Uint64SubByScalar      = numericSubByScalar[uint64]
+	Float32Sub             = numericSub[float32]
+	Float32SubScalar       = numericSubScalar[float32]
+	Float32SubByScalar     = numericSubByScalar[float32]
+	Float64Sub             = numericSub[float64]
+	Float64SubScalar       = numericSubScalar[float64]
+	Float64SubByScalar     = numericSubByScalar[float64]
+	Int8SubSels            = numericSubSels[int8]
+	Int8SubScalarSels      = numericSubScalarSels[int8]
+	Int8SubByScalarSels    = numericSubByScalarSels[int8]
+	Int16SubSels           = numericSubSels[int16]
+	Int16SubScalarSels     = numericSubScalarSels[int16]
+	Int16SubByScalarSels   = numericSubByScalarSels[int16]
+	Int32SubSels           = numericSubSels[int32]
+	Int32SubScalarSels     = numericSubScalarSels[int32]
+	Int32SubByScalarSels   = numericSubByScalarSels[int32]
+	Int64SubSels           = numericSubSels[int64]
+	Int64SubScalarSels     = numericSubScalarSels[int64]
+	Int64SubByScalarSels   = numericSubByScalarSels[int64]
+	Uint8SubSels           = numericSubSels[uint8]
+	Uint8SubScalarSels     = numericSubScalarSels[uint8]
+	Uint8SubByScalarSels   = numericSubByScalarSels[uint8]
+	Uint16SubSels          = numericSubSels[uint16]
+	Uint16SubScalarSels    = numericSubScalarSels[uint16]
+	Uint16SubByScalarSels  = numericSubByScalarSels[uint16]
+	Uint32SubSels          = numericSubSels[uint32]
+	Uint32SubScalarSels    = numericSubScalarSels[uint32]
+	Uint32SubByScalarSels  = numericSubByScalarSels[uint32]
+	Uint64SubSels          = numericSubSels[uint64]
+	Uint64SubScalarSels    = numericSubScalarSels[uint64]
+	Uint64SubByScalarSels  = numericSubByScalarSels[uint64]
+	Float32SubSels         = numericSubSels[float32]
+	Float32SubScalarSels   = numericSubScalarSels[float32]
+	Float32SubByScalarSels = numericSubByScalarSels[float32]
+	Float64SubSels         = numericSubSels[float64]
+	Float64SubScalarSels   = numericSubScalarSels[float64]
+	Float64SubByScalarSels = numericSubByScalarSels[float64]
+
+	Decimal64Sub              = decimal64Sub
+	Decimal64SubSels          = decimal64SubSels
+	Decimal64SubScalar        = decimal64SubScalar
+	Decimal64SubScalarSels    = decimal64SubScalarSels
+	Decimal64SubByScalar      = decimal64SubByScalar
+	Decimal64SubByScalarSels  = decimal64SubByScalarSels
+	Decimal128Sub             = decimal128Sub
+	Decimal128SubSels         = decimal128SubSels
+	Decimal128SubScalar       = decimal128SubScalar
+	Decimal128SubScalarSels   = decimal128SubScalarSels
+	Decimal128SubByScalar     = decimal128SubByScalar
 	Decimal128SubByScalarSels = decimal128SubByScalarSels
-}
-
-func int8Sub(xs, ys, rs []int8) []int8 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func int8SubSels(xs, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func int8SubScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func int8SubScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func int8SubByScalar(x int8, ys, rs []int8) []int8 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
-
-func int8SubByScalarSels(x int8, ys, rs []int8, sels []int64) []int8 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func int16Sub(xs, ys, rs []int16) []int16 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func int16SubSels(xs, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func int16SubScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func int16SubScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func int16SubByScalar(x int16, ys, rs []int16) []int16 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
-
-func int16SubByScalarSels(x int16, ys, rs []int16, sels []int64) []int16 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func int32Sub(xs, ys, rs []int32) []int32 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func int32SubSels(xs, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func int32SubScalar(x int32, ys, rs []int32) []int32 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func int32SubScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func int32SubByScalar(x int32, ys, rs []int32) []int32 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
-
-func int32SubByScalarSels(x int32, ys, rs []int32, sels []int64) []int32 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func int64Sub(xs, ys, rs []int64) []int64 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func int64SubSels(xs, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func int64SubScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func int64SubScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func int64SubByScalar(x int64, ys, rs []int64) []int64 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
-
-func int64SubByScalarSels(x int64, ys, rs []int64, sels []int64) []int64 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func uint8Sub(xs, ys, rs []uint8) []uint8 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func uint8SubSels(xs, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func uint8SubScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func uint8SubScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func uint8SubByScalar(x uint8, ys, rs []uint8) []uint8 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
-
-func uint8SubByScalarSels(x uint8, ys, rs []uint8, sels []int64) []uint8 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func uint16Sub(xs, ys, rs []uint16) []uint16 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func uint16SubSels(xs, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func uint16SubScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func uint16SubScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func uint16SubByScalar(x uint16, ys, rs []uint16) []uint16 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
-
-func uint16SubByScalarSels(x uint16, ys, rs []uint16, sels []int64) []uint16 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func uint32Sub(xs, ys, rs []uint32) []uint32 {
-	for i, x := range xs {
-		rs[i] = x - ys[i]
-	}
-	return rs
-}
-
-func uint32SubSels(xs, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
-	}
-	return rs
-}
-
-func uint32SubScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = x - y
-	}
-	return rs
-}
-
-func uint32SubScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = x - ys[sel]
-	}
-	return rs
-}
-
-func uint32SubByScalar(x uint32, ys, rs []uint32) []uint32 {
-	for i, y := range ys {
-		rs[i] = y - x
-	}
-	return rs
-}
 
-func uint32SubByScalarSels(x uint32, ys, rs []uint32, sels []int64) []uint32 {
-	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
+	Int32Int64Sub         = numericSubBigSmall[int64, int32]
+	Int32Int64SubSels     = numericSubSelsBigSmall[int64, int32]
+	Int16Int64Sub         = numericSubBigSmall[int64, int16]
+	Int16Int64SubSels     = numericSubSelsBigSmall[int64, int16]
+	Int8Int64Sub          = numericSubBigSmall[int64, int8]
+	Int8Int64SubSels      = numericSubSelsBigSmall[int64, int8]
+	Int16Int32Sub         = numericSubBigSmall[int32, int16]
+	Int16Int32SubSels     = numericSubSelsBigSmall[int32, int16]
+	Int8Int32Sub          = numericSubBigSmall[int32, int8]
+	Int8Int32SubSels      = numericSubSelsBigSmall[int32, int8]
+	Int8Int16Sub          = numericSubBigSmall[int16, int8]
+	Int8Int16SubSels      = numericSubSelsBigSmall[int16, int8]
+	Uint32Uint64Sub       = numericSubBigSmall[uint64, uint32]
+	Uint32Uint64SubSels   = numericSubSelsBigSmall[uint64, uint32]
+	Uint16Uint64Sub       = numericSubBigSmall[uint64, uint16]
+	Uint16Uint64SubSels   = numericSubSelsBigSmall[uint64, uint16]
+	Uint8Uint64Sub        = numericSubBigSmall[uint64, uint8]
+	Uint8Uint64SubSels    = numericSubSelsBigSmall[uint64, uint8]
+	Uint16Uint32Sub       = numericSubBigSmall[uint32, uint16]
+	Uint16Uint32SubSels   = numericSubSelsBigSmall[uint32, uint16]
+	Uint8Uint32Sub        = numericSubBigSmall[uint32, uint8]
+	Uint8Uint32SubSels    = numericSubSelsBigSmall[uint32, uint8]
+	Uint8Uint16Sub        = numericSubBigSmall[uint16, uint8]
+	Uint8Uint16SubSels    = numericSubSelsBigSmall[uint16, uint8]
+	Float32Float64Sub     = numericSubBigSmall[float64, float32]
+	Float32Float64SubSels = numericSubSelsBigSmall[float64, float32]
+)
 
-func uint64Sub(xs, ys, rs []uint64) []uint64 {
+func numericSub[T constraints.Integer | constraints.Float](xs, ys, rs []T) []T {
 	for i, x := range xs {
 		rs[i] = x - ys[i]
 	}
 	return rs
 }
 
-func uint64SubSels(xs, ys, rs []uint64, sels []int64) []uint64 {
+func numericSubSels[T constraints.Integer | constraints.Float](xs, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = xs[sel] - ys[sel]
 	}
 	return rs
 }
 
-func uint64SubScalar(x uint64, ys, rs []uint64) []uint64 {
+func numericSubScalar[T constraints.Integer | constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = x - y
 	}
 	return rs
 }
 
-func uint64SubScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
+func numericSubScalarSels[T constraints.Integer | constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = x - ys[sel]
 	}
 	return rs
 }
 
-func uint64SubByScalar(x uint64, ys, rs []uint64) []uint64 {
+func numericSubByScalar[T constraints.Integer | constraints.Float](x T, ys, rs []T) []T {
 	for i, y := range ys {
 		rs[i] = y - x
 	}
 	return rs
 }
 
-func uint64SubByScalarSels(x uint64, ys, rs []uint64, sels []int64) []uint64 {
+func numericSubByScalarSels[T constraints.Integer | constraints.Float](x T, ys, rs []T, sels []int64) []T {
 	for i, sel := range sels {
 		rs[i] = ys[sel] - x
 	}
 	return rs
 }
 
-func float32Sub(xs, ys, rs []float32) []float32 {
+func numericSubBigSmall[TBig, TSmall constraints.Integer | constraints.Float](xs []TBig, ys []TSmall, rs []TBig) []TBig {
 	for i, x := range xs {
-		rs[i] = x - ys[i]
+		rs[i] = x - TBig(ys[i])
 	}
 	return rs
 }
 
-func float32SubSels(xs, ys, rs []float32, sels []int64) []float32 {
+func numericSubSelsBigSmall[TBig, TSmall constraints.Integer | constraints.Float](xs []TBig, ys []TSmall, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
+		rs[i] = xs[sel] - TBig(ys[sel])
 	}
 	return rs
 }
 
-func float32SubScalar(x float32, ys, rs []float32) []float32 {
+/*
+func numericSubScalarBigSmall[TBig, TSmall constraints.Integer | constraints.Float](x TBig, ys []TSmall, rs []TBig) []TBig {
 	for i, y := range ys {
-		rs[i] = x - y
+		rs[i] = x - TBig(y)
 	}
 	return rs
 }
 
-func float32SubScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
+func numericSubScalarSelsBigSmall[TBig, TSmall constraints.Integer | constraints.Float](x TBig, ys []TSmall, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = x - ys[sel]
+		rs[i] = x - TBig(ys[sel])
 	}
 	return rs
 }
 
-func float32SubByScalar(x float32, ys, rs []float32) []float32 {
+func numericSubByScalarBigSmall[TBig, TSmall constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig) []TBig {
 	for i, y := range ys {
-		rs[i] = y - x
+		rs[i] = y - TBig(x)
 	}
 	return rs
 }
 
-func float32SubByScalarSels(x float32, ys, rs []float32, sels []int64) []float32 {
+func numericSubByScalarSelsBigSmall[TBig, TSmall constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = ys[sel] - x
+		rs[i] = ys[sel] - TBig(x)
 	}
 	return rs
 }
 
-func float64Sub(xs, ys, rs []float64) []float64 {
+func numericSubSmallBig[TSmall, TBig constraints.Integer | constraints.Float](xs []TSmall, ys, rs []TBig) []TBig {
 	for i, x := range xs {
-		rs[i] = x - ys[i]
+		rs[i] = TBig(x) - ys[i]
 	}
 	return rs
 }
 
-func float64SubSels(xs, ys, rs []float64, sels []int64) []float64 {
+func numericSubSelsSmallBig[TSmall, TBig constraints.Integer | constraints.Float](xs []TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = xs[sel] - ys[sel]
+		rs[i] = TBig(xs[sel]) - ys[sel]
 	}
 	return rs
 }
 
-func float64SubScalar(x float64, ys, rs []float64) []float64 {
+func numericSubScalarSmallBig[TSmall, TBig constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig) []TBig {
 	for i, y := range ys {
-		rs[i] = x - y
+		rs[i] = TBig(x) - y
 	}
 	return rs
 }
 
-func float64SubScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func numericSubScalarSelsSmallBig[TSmall, TBig constraints.Integer | constraints.Float](x TSmall, ys, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = x - ys[sel]
+		rs[i] = TBig(x) - ys[sel]
 	}
 	return rs
 }
 
-func float64SubByScalar(x float64, ys, rs []float64) []float64 {
+func numericSubByScalarSmallBig[TSmall, TBig constraints.Integer | constraints.Float](x TBig, ys []TSmall, rs []TBig) []TBig {
 	for i, y := range ys {
-		rs[i] = y - x
+		rs[i] = TBig(y) - x
 	}
 	return rs
 }
 
-func float64SubByScalarSels(x float64, ys, rs []float64, sels []int64) []float64 {
+func numericSubByScalarSelsSmallBig[TSmall, TBig constraints.Integer | constraints.Float](x TBig, ys []TSmall, rs []TBig, sels []int64) []TBig {
 	for i, sel := range sels {
-		rs[i] = ys[sel] - x
-	}
-	return rs
-}
-
-func int32Int64Sub(xs []int64, ys []int32, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = xs[i] - int64(ys[i])
-	}
-	return rs
-}
-
-func int32Int64SubSels(xs []int64, ys []int32, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - int64(ys[sel])
-	}
-	return rs
-}
-
-func int16Int64Sub(xs []int64, ys []int16, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = xs[i] - int64(ys[i])
-	}
-	return rs
-}
-
-func int16Int64SubSels(xs []int64, ys []int16, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - int64(ys[sel])
-	}
-	return rs
-}
-
-func int8Int64Sub(xs []int64, ys []int8, rs []int64) []int64 {
-	for i := range rs {
-		rs[i] = xs[i] - int64(ys[i])
-	}
-	return rs
-}
-
-func int8Int64SubSels(xs []int64, ys []int8, rs []int64, sels []int64) []int64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - int64(ys[sel])
-	}
-	return rs
-}
-
-func int16Int32Sub(xs []int32, ys []int16, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = xs[i] - int32(ys[i])
-	}
-	return rs
-}
-
-func int16Int32SubSels(xs []int32, ys []int16, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - int32(ys[sel])
-	}
-	return rs
-}
-
-func int8Int32Sub(xs []int32, ys []int8, rs []int32) []int32 {
-	for i := range rs {
-		rs[i] = xs[i] - int32(ys[i])
-	}
-	return rs
-}
-
-func int8Int32SubSels(xs []int32, ys []int8, rs []int32, sels []int64) []int32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - int32(ys[sel])
-	}
-	return rs
-}
-
-func int8Int16Sub(xs []int16, ys []int8, rs []int16) []int16 {
-	for i := range rs {
-		rs[i] = xs[i] - int16(ys[i])
-	}
-	return rs
-}
-
-func int8Int16SubSels(xs []int16, ys []int8, rs []int16, sels []int64) []int16 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - int16(ys[sel])
-	}
-	return rs
-}
-
-func float32Float64Sub(xs []float64, ys []float32, rs []float64) []float64 {
-	for i := range rs {
-		rs[i] = xs[i] - float64(ys[i])
-	}
-	return rs
-}
-
-func float32Float64SubSels(xs []float64, ys []float32, rs []float64, sels []int64) []float64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - float64(ys[sel])
-	}
-	return rs
-}
-
-func uint32Uint64Sub(xs []uint64, ys []uint32, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = xs[i] - uint64(ys[i])
-	}
-	return rs
-}
-
-func uint32Uint64SubSels(xs []uint64, ys []uint32, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - uint64(ys[sel])
-	}
-	return rs
-}
-
-func uint16Uint64Sub(xs []uint64, ys []uint16, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = xs[i] - uint64(ys[i])
-	}
-	return rs
-}
-
-func uint16Uint64SubSels(xs []uint64, ys []uint16, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - uint64(ys[sel])
-	}
-	return rs
-}
-
-func uint8Uint64Sub(xs []uint64, ys []uint8, rs []uint64) []uint64 {
-	for i := range rs {
-		rs[i] = xs[i] - uint64(ys[i])
-	}
-	return rs
-}
-
-func uint8Uint64SubSels(xs []uint64, ys []uint8, rs []uint64, sels []int64) []uint64 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - uint64(ys[sel])
-	}
-	return rs
-}
-
-func uint16Uint32Sub(xs []uint32, ys []uint16, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = xs[i] - uint32(ys[i])
-	}
-	return rs
-}
-
-func uint16Uint32SubSels(xs []uint32, ys []uint16, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - uint32(ys[sel])
-	}
-	return rs
-}
-
-func uint8Uint32Sub(xs []uint32, ys []uint8, rs []uint32) []uint32 {
-	for i := range rs {
-		rs[i] = xs[i] - uint32(ys[i])
-	}
-	return rs
-}
-
-func uint8Uint32SubSels(xs []uint32, ys []uint8, rs []uint32, sels []int64) []uint32 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - uint32(ys[sel])
-	}
-	return rs
-}
-
-func uint8Uint16Sub(xs []uint16, ys []uint8, rs []uint16) []uint16 {
-	for i := range rs {
-		rs[i] = xs[i] - uint16(ys[i])
-	}
-	return rs
-}
-
-func uint8Uint16SubSels(xs []uint16, ys []uint8, rs []uint16, sels []int64) []uint16 {
-	for _, sel := range sels {
-		rs[sel] = xs[sel] - uint16(ys[sel])
+		rs[i] = TBig(ys[sel]) - x
 	}
 	return rs
 }
+*/
 
 func decimal64Sub(xs []types.Decimal64, ys []types.Decimal64, xsScale int32, ysScale int32, rs []types.Decimal64) []types.Decimal64 {
 	if xsScale > ysScale {
@@ -790,10 +303,9 @@ func decimal64SubScalar(x types.Decimal64, ys []types.Decimal64, xScale, ysScale
 		}
 		return rs
 	} else if xScale < ysScale {
-		xScaled := x
 		scaleDiff := ysScale - xScale
 		scale := int64(math.Pow10(int(scaleDiff)))
-		xScaled = types.ScaleDecimal64(x, scale)
+		xScaled := types.ScaleDecimal64(x, scale)
 		for i, y := range ys {
 			rs[i] = types.Decimal64SubAligned(xScaled, y)
 		}
@@ -826,10 +338,9 @@ func decimal64SubByScalar(x types.Decimal64, ys []types.Decimal64, xScale, ysSca
 		}
 		return rs
 	} else if xScale < ysScale {
-		xScaled := x
 		scaleDiff := ysScale - xScale
 		scale := int64(math.Pow10(int(scaleDiff)))
-		xScaled = types.ScaleDecimal64(x, scale)
+		xScaled := types.ScaleDecimal64(x, scale)
 		for i, y := range ys {
 			rs[i] = types.Decimal64SubAligned(y, xScaled)
 		}
diff --git a/pkg/vectorize/sub/sub_amd64.go b/pkg/vectorize/sub/sub_amd64.go
index 319a34b2256cfd5280920145fdb1199f8f5b1d0d..dacdd146bda8b04c87ae7292fd09089896184dbf 100644
--- a/pkg/vectorize/sub/sub_amd64.go
+++ b/pkg/vectorize/sub/sub_amd64.go
@@ -141,96 +141,7 @@ func init() {
 		Float64Sub = float64SubAvx2
 		Float64SubScalar = float64SubScalarAvx2
 		Float64SubByScalar = float64SubByScalarAvx2
-	} else {
-		Int8Sub = int8Sub
-		Int8SubScalar = int8SubScalar
-		Int8SubByScalar = int8SubByScalar
-		Int16Sub = int16Sub
-		Int16SubScalar = int16SubScalar
-		Int16SubByScalar = int16SubByScalar
-		Int32Sub = int32Sub
-		Int32SubScalar = int32SubScalar
-		Int32SubByScalar = int32SubByScalar
-		Int64Sub = int64Sub
-		Int64SubScalar = int64SubScalar
-		Int64SubByScalar = int64SubByScalar
-		Uint8Sub = uint8Sub
-		Uint8SubScalar = uint8SubScalar
-		Uint8SubByScalar = uint8SubByScalar
-		Uint16Sub = uint16Sub
-		Uint16SubScalar = uint16SubScalar
-		Uint16SubByScalar = uint16SubByScalar
-		Uint32Sub = uint32Sub
-		Uint32SubScalar = uint32SubScalar
-		Uint32SubByScalar = uint32SubByScalar
-		Uint64Sub = uint64Sub
-		Uint64SubScalar = uint64SubScalar
-		Uint64SubByScalar = uint64SubByScalar
-		Float32Sub = float32Sub
-		Float32SubScalar = float32SubScalar
-		Float32SubByScalar = float32SubByScalar
-		Float64Sub = float64Sub
-		Float64SubScalar = float64SubScalar
-		Float64SubByScalar = float64SubByScalar
-	}
-
-	Int8SubSels = int8SubSels
-	Int8SubScalarSels = int8SubScalarSels
-	Int8SubByScalarSels = int8SubByScalarSels
-	Int16SubSels = int16SubSels
-	Int16SubScalarSels = int16SubScalarSels
-	Int16SubByScalarSels = int16SubByScalarSels
-	Int32SubSels = int32SubSels
-	Int32SubScalarSels = int32SubScalarSels
-	Int32SubByScalarSels = int32SubByScalarSels
-	Int64SubSels = int64SubSels
-	Int64SubScalarSels = int64SubScalarSels
-	Int64SubByScalarSels = int64SubByScalarSels
-	Uint8SubSels = uint8SubSels
-	Uint8SubScalarSels = uint8SubScalarSels
-	Uint8SubByScalarSels = uint8SubByScalarSels
-	Uint16SubSels = uint16SubSels
-	Uint16SubScalarSels = uint16SubScalarSels
-	Uint16SubByScalarSels = uint16SubByScalarSels
-	Uint32SubSels = uint32SubSels
-	Uint32SubScalarSels = uint32SubScalarSels
-	Uint32SubByScalarSels = uint32SubByScalarSels
-	Uint64SubSels = uint64SubSels
-	Uint64SubScalarSels = uint64SubScalarSels
-	Uint64SubByScalarSels = uint64SubByScalarSels
-	Float32SubSels = float32SubSels
-	Float32SubScalarSels = float32SubScalarSels
-	Float32SubByScalarSels = float32SubByScalarSels
-	Float64SubSels = float64SubSels
-	Float64SubScalarSels = float64SubScalarSels
-	Float64SubByScalarSels = float64SubByScalarSels
-
-	Int32Int64Sub = int32Int64Sub
-	Int32Int64SubSels = int32Int64SubSels
-	Int16Int64Sub = int16Int64Sub
-	Int16Int64SubSels = int16Int64SubSels
-	Int8Int64Sub = int8Int64Sub
-	Int8Int64SubSels = int8Int64SubSels
-	Int16Int32Sub = int16Int32Sub
-	Int16Int32SubSels = int16Int32SubSels
-	Int8Int32Sub = int8Int32Sub
-	Int8Int32SubSels = int8Int32SubSels
-	Int8Int16Sub = int8Int16Sub
-	Int8Int16SubSels = int8Int16SubSels
-	Float32Float64Sub = float32Float64Sub
-	Float32Float64SubSels = float32Float64SubSels
-	Uint32Uint64Sub = uint32Uint64Sub
-	Uint32Uint64SubSels = uint32Uint64SubSels
-	Uint16Uint64Sub = uint16Uint64Sub
-	Uint16Uint64SubSels = uint16Uint64SubSels
-	Uint8Uint64Sub = uint8Uint64Sub
-	Uint8Uint64SubSels = uint8Uint64SubSels
-	Uint16Uint32Sub = uint16Uint32Sub
-	Uint16Uint32SubSels = uint16Uint32SubSels
-	Uint8Uint32Sub = uint8Uint32Sub
-	Uint8Uint32SubSels = uint8Uint32SubSels
-	Uint8Uint16Sub = uint8Uint16Sub
-	Uint8Uint16SubSels = uint8Uint16SubSels
+	}
 }
 
 func int8SubAvx2(xs, ys, rs []int8) []int8 {
diff --git a/pkg/vectorize/sub/sub_arm64.go b/pkg/vectorize/sub/sub_arm64.go
deleted file mode 100644
index 6ea3dc73cf136bd2f59a90696e8d94f79a53742e..0000000000000000000000000000000000000000
--- a/pkg/vectorize/sub/sub_arm64.go
+++ /dev/null
@@ -1,105 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package sub
-
-func init() {
-	Int8Sub = int8Sub
-	Int8SubScalar = int8SubScalar
-	Int8SubByScalar = int8SubByScalar
-	Int16Sub = int16Sub
-	Int16SubScalar = int16SubScalar
-	Int16SubByScalar = int16SubByScalar
-	Int32Sub = int32Sub
-	Int32SubScalar = int32SubScalar
-	Int32SubByScalar = int32SubByScalar
-	Int64Sub = int64Sub
-	Int64SubScalar = int64SubScalar
-	Int64SubByScalar = int64SubByScalar
-	Uint8Sub = uint8Sub
-	Uint8SubScalar = uint8SubScalar
-	Uint8SubByScalar = uint8SubByScalar
-	Uint16Sub = uint16Sub
-	Uint16SubScalar = uint16SubScalar
-	Uint16SubByScalar = uint16SubByScalar
-	Uint32Sub = uint32Sub
-	Uint32SubScalar = uint32SubScalar
-	Uint32SubByScalar = uint32SubByScalar
-	Uint64Sub = uint64Sub
-	Uint64SubScalar = uint64SubScalar
-	Uint64SubByScalar = uint64SubByScalar
-	Float32Sub = float32Sub
-	Float32SubScalar = float32SubScalar
-	Float32SubByScalar = float32SubByScalar
-	Float64Sub = float64Sub
-	Float64SubScalar = float64SubScalar
-	Float64SubByScalar = float64SubByScalar
-	Int8SubSels = int8SubSels
-	Int8SubScalarSels = int8SubScalarSels
-	Int8SubByScalarSels = int8SubByScalarSels
-	Int16SubSels = int16SubSels
-	Int16SubScalarSels = int16SubScalarSels
-	Int16SubByScalarSels = int16SubByScalarSels
-	Int32SubSels = int32SubSels
-	Int32SubScalarSels = int32SubScalarSels
-	Int32SubByScalarSels = int32SubByScalarSels
-	Int64SubSels = int64SubSels
-	Int64SubScalarSels = int64SubScalarSels
-	Int64SubByScalarSels = int64SubByScalarSels
-	Uint8SubSels = uint8SubSels
-	Uint8SubScalarSels = uint8SubScalarSels
-	Uint8SubByScalarSels = uint8SubByScalarSels
-	Uint16SubSels = uint16SubSels
-	Uint16SubScalarSels = uint16SubScalarSels
-	Uint16SubByScalarSels = uint16SubByScalarSels
-	Uint32SubSels = uint32SubSels
-	Uint32SubScalarSels = uint32SubScalarSels
-	Uint32SubByScalarSels = uint32SubByScalarSels
-	Uint64SubSels = uint64SubSels
-	Uint64SubScalarSels = uint64SubScalarSels
-	Uint64SubByScalarSels = uint64SubByScalarSels
-	Float32SubSels = float32SubSels
-	Float32SubScalarSels = float32SubScalarSels
-	Float32SubByScalarSels = float32SubByScalarSels
-	Float64SubSels = float64SubSels
-	Float64SubScalarSels = float64SubScalarSels
-	Float64SubByScalarSels = float64SubByScalarSels
-
-	Int32Int64Sub = int32Int64Sub
-	Int32Int64SubSels = int32Int64SubSels
-	Int16Int64Sub = int16Int64Sub
-	Int16Int64SubSels = int16Int64SubSels
-	Int8Int64Sub = int8Int64Sub
-	Int8Int64SubSels = int8Int64SubSels
-	Int16Int32Sub = int16Int32Sub
-	Int16Int32SubSels = int16Int32SubSels
-	Int8Int32Sub = int8Int32Sub
-	Int8Int32SubSels = int8Int32SubSels
-	Int8Int16Sub = int8Int16Sub
-	Int8Int16SubSels = int8Int16SubSels
-	Float32Float64Sub = float32Float64Sub
-	Float32Float64SubSels = float32Float64SubSels
-	Uint32Uint64Sub = uint32Uint64Sub
-	Uint32Uint64SubSels = uint32Uint64SubSels
-	Uint16Uint64Sub = uint16Uint64Sub
-	Uint16Uint64SubSels = uint16Uint64SubSels
-	Uint8Uint64Sub = uint8Uint64Sub
-	Uint8Uint64SubSels = uint8Uint64SubSels
-	Uint16Uint32Sub = uint16Uint32Sub
-	Uint16Uint32SubSels = uint16Uint32SubSels
-	Uint8Uint32Sub = uint8Uint32Sub
-	Uint8Uint32SubSels = uint8Uint32SubSels
-	Uint8Uint16Sub = uint8Uint16Sub
-	Uint8Uint16SubSels = uint8Uint16SubSels
-}
diff --git a/pkg/vectorize/sub/avx2.s b/pkg/vectorize/sub/sub_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/sub/avx2.s
rename to pkg/vectorize/sub/sub_avx2_amd64.s
index 844599471472266b803b1ede77f2cc50842dac3f..dd6632b9f69edc9536b145629d27a7bcabfd3dc6 100644
--- a/pkg/vectorize/sub/avx2.s
+++ b/pkg/vectorize/sub/sub_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/sub/avx512.s b/pkg/vectorize/sub/sub_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/sub/avx512.s
rename to pkg/vectorize/sub/sub_avx512_amd64.s
index 4f445c23c6fbf937c2e2e236ca81aa94c25ad06b..ad993c2ba08abac1e925b4079f967d189fa8cc7f 100644
--- a/pkg/vectorize/sub/avx512.s
+++ b/pkg/vectorize/sub/sub_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s -stubs avx512_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/sum/sum.go b/pkg/vectorize/sum/sum.go
index a4942ac89dfb0bddc3324ee143e0be36012d3085..11b937f85f0c6ff9bc27d2fe266dfaac44340c03 100644
--- a/pkg/vectorize/sum/sum.go
+++ b/pkg/vectorize/sum/sum.go
@@ -14,66 +14,34 @@
 
 package sum
 
+import "golang.org/x/exp/constraints"
+
 var (
-	Int8Sum        func([]int8) int64
-	Int8SumSels    func([]int8, []int64) int64
-	Int16Sum       func([]int16) int64
-	Int16SumSels   func([]int16, []int64) int64
-	Int32Sum       func([]int32) int64
-	Int32SumSels   func([]int32, []int64) int64
-	Int64Sum       func([]int64) int64
-	Int64SumSels   func([]int64, []int64) int64
-	Uint8Sum       func([]uint8) uint64
-	Uint8SumSels   func([]uint8, []int64) uint64
-	Uint16Sum      func([]uint16) uint64
-	Uint16SumSels  func([]uint16, []int64) uint64
-	Uint32Sum      func([]uint32) uint64
-	Uint32SumSels  func([]uint32, []int64) uint64
-	Uint64Sum      func([]uint64) uint64
-	Uint64SumSels  func([]uint64, []int64) uint64
-	Float32Sum     func([]float32) float32
-	Float32SumSels func([]float32, []int64) float32
-	Float64Sum     func([]float64) float64
-	Float64SumSels func([]float64, []int64) float64
+	Int8Sum      = signedSum[int8]
+	Int16Sum     = signedSum[int16]
+	Int32Sum     = signedSum[int32]
+	Int64Sum     = signedSum[int64]
+	Int8SumSels  = signedSumSels[int8]
+	Int16SumSels = signedSumSels[int16]
+	Int32SumSels = signedSumSels[int32]
+	Int64SumSels = signedSumSels[int64]
+
+	Uint8Sum      = unsignedSum[uint8]
+	Uint16Sum     = unsignedSum[uint16]
+	Uint32Sum     = unsignedSum[uint32]
+	Uint64Sum     = unsignedSum[uint64]
+	Uint8SumSels  = unsignedSumSels[uint8]
+	Uint16SumSels = unsignedSumSels[uint16]
+	Uint32SumSels = unsignedSumSels[uint32]
+	Uint64SumSels = unsignedSumSels[uint64]
+
+	Float32Sum     = floatSum[float32]
+	Float64Sum     = floatSum[float64]
+	Float32SumSels = floatSumSels[float32]
+	Float64SumSels = floatSumSels[float64]
 )
 
-func int8Sum(xs []int8) int64 {
-	var res int64
-
-	for _, x := range xs {
-		res += int64(x)
-	}
-	return res
-}
-
-func int8SumSels(xs []int8, sels []int64) int64 {
-	var res int64
-
-	for _, sel := range sels {
-		res += int64(xs[sel])
-	}
-	return res
-}
-
-func int16Sum(xs []int16) int64 {
-	var res int64
-
-	for _, x := range xs {
-		res += int64(x)
-	}
-	return res
-}
-
-func int16SumSels(xs []int16, sels []int64) int64 {
-	var res int64
-
-	for _, sel := range sels {
-		res += int64(xs[sel])
-	}
-	return res
-}
-
-func int32Sum(xs []int32) int64 {
+func signedSum[T constraints.Signed](xs []T) int64 {
 	var res int64
 
 	for _, x := range xs {
@@ -82,7 +50,7 @@ func int32Sum(xs []int32) int64 {
 	return res
 }
 
-func int32SumSels(xs []int32, sels []int64) int64 {
+func signedSumSels[T constraints.Signed](xs []T, sels []int64) int64 {
 	var res int64
 
 	for _, sel := range sels {
@@ -91,25 +59,7 @@ func int32SumSels(xs []int32, sels []int64) int64 {
 	return res
 }
 
-func int64Sum(xs []int64) int64 {
-	var res int64
-
-	for _, x := range xs {
-		res += x
-	}
-	return res
-}
-
-func int64SumSels(xs []int64, sels []int64) int64 {
-	var res int64
-
-	for _, sel := range sels {
-		res += xs[sel]
-	}
-	return res
-}
-
-func uint8Sum(xs []uint8) uint64 {
+func unsignedSum[T constraints.Unsigned](xs []T) uint64 {
 	var res uint64
 
 	for _, x := range xs {
@@ -118,7 +68,7 @@ func uint8Sum(xs []uint8) uint64 {
 	return res
 }
 
-func uint8SumSels(xs []uint8, sels []int64) uint64 {
+func unsignedSumSels[T constraints.Unsigned](xs []T, sels []int64) uint64 {
 	var res uint64
 
 	for _, sel := range sels {
@@ -127,80 +77,8 @@ func uint8SumSels(xs []uint8, sels []int64) uint64 {
 	return res
 }
 
-func uint16Sum(xs []uint16) uint64 {
-	var res uint64
-
-	for _, x := range xs {
-		res += uint64(x)
-	}
-	return res
-}
-
-func uint16SumSels(xs []uint16, sels []int64) uint64 {
-	var res uint64
-
-	for _, sel := range sels {
-		res += uint64(xs[sel])
-	}
-	return res
-}
-
-func uint32Sum(xs []uint32) uint64 {
-	var res uint64
-
-	for _, x := range xs {
-		res += uint64(x)
-	}
-	return res
-}
-
-func uint32SumSels(xs []uint32, sels []int64) uint64 {
-	var res uint64
-
-	for _, sel := range sels {
-		res += uint64(xs[sel])
-	}
-	return res
-}
-
-func uint64Sum(xs []uint64) uint64 {
-	var res uint64
-
-	for _, x := range xs {
-		res += x
-	}
-	return res
-}
-
-func uint64SumSels(xs []uint64, sels []int64) uint64 {
-	var res uint64
-
-	for _, sel := range sels {
-		res += xs[sel]
-	}
-	return res
-}
-
-func float32Sum(xs []float32) float32 {
-	var res float32
-
-	for _, x := range xs {
-		res += x
-	}
-	return res
-}
-
-func float32SumSels(xs []float32, sels []int64) float32 {
-	var res float32
-
-	for _, sel := range sels {
-		res += xs[sel]
-	}
-	return res
-}
-
-func float64Sum(xs []float64) float64 {
-	var res float64
+func floatSum[T constraints.Float](xs []T) T {
+	var res T
 
 	for _, x := range xs {
 		res += x
@@ -208,8 +86,8 @@ func float64Sum(xs []float64) float64 {
 	return res
 }
 
-func float64SumSels(xs []float64, sels []int64) float64 {
-	var res float64
+func floatSumSels[T constraints.Float](xs []T, sels []int64) T {
+	var res T
 
 	for _, sel := range sels {
 		res += xs[sel]
diff --git a/pkg/vectorize/sum/sum_amd64.go b/pkg/vectorize/sum/sum_amd64.go
index 6bf3c1fe3a1dcb959db69ab4c3404209d47f1fb6..3088f9af984dc00f0bd5e26e6d628ce25c4bab66 100644
--- a/pkg/vectorize/sum/sum_amd64.go
+++ b/pkg/vectorize/sum/sum_amd64.go
@@ -12,9 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-//go:build amd64
-// +build amd64
-
 package sum
 
 import (
@@ -65,28 +62,7 @@ func init() {
 		Uint64Sum = uint64SumAvx2
 		Float32Sum = float32SumAvx2
 		Float64Sum = float64SumAvx2
-	} else {
-		Int8Sum = int8Sum
-		Int16Sum = int16Sum
-		Int32Sum = int32Sum
-		Int64Sum = int64Sum
-		Uint8Sum = uint8Sum
-		Uint16Sum = uint16Sum
-		Uint32Sum = uint32Sum
-		Uint64Sum = uint64Sum
-		Float32Sum = float32Sum
-		Float64Sum = float64Sum
 	}
-	Int8SumSels = int8SumSels
-	Int16SumSels = int16SumSels
-	Int32SumSels = int32SumSels
-	Int64SumSels = int64SumSels
-	Uint8SumSels = uint8SumSels
-	Uint16SumSels = uint16SumSels
-	Uint32SumSels = uint32SumSels
-	Uint64SumSels = uint64SumSels
-	Float32SumSels = float32SumSels
-	Float64SumSels = float64SumSels
 }
 
 func int8SumAvx2(xs []int8) int64 {
diff --git a/pkg/vectorize/sum/sum_arm64.go b/pkg/vectorize/sum/sum_arm64.go
deleted file mode 100644
index 634740a64ac30b44412b53db59a44e360d3010b6..0000000000000000000000000000000000000000
--- a/pkg/vectorize/sum/sum_arm64.go
+++ /dev/null
@@ -1,41 +0,0 @@
-// Copyright 2021 Matrix Origin
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//go:build arm64
-// +build arm64
-
-package sum
-
-func init() {
-	Int8Sum = int8Sum
-	Int16Sum = int16Sum
-	Int32Sum = int32Sum
-	Int64Sum = int64Sum
-	Uint8Sum = uint8Sum
-	Uint16Sum = uint16Sum
-	Uint32Sum = uint32Sum
-	Uint64Sum = uint64Sum
-	Float32Sum = float32Sum
-	Float64Sum = float64Sum
-	Int8SumSels = int8SumSels
-	Int16SumSels = int16SumSels
-	Int32SumSels = int32SumSels
-	Int64SumSels = int64SumSels
-	Uint8SumSels = uint8SumSels
-	Uint16SumSels = uint16SumSels
-	Uint32SumSels = uint32SumSels
-	Uint64SumSels = uint64SumSels
-	Float32SumSels = float32SumSels
-	Float64SumSels = float64SumSels
-}
diff --git a/pkg/vectorize/sum/avx2.s b/pkg/vectorize/sum/sum_avx2_amd64.s
similarity index 99%
rename from pkg/vectorize/sum/avx2.s
rename to pkg/vectorize/sum/sum_avx2_amd64.s
index 8db388c30b6e60c19dee7eae6089aa8f3622ea90..c50bc4e3a4410656293b511512c07c0019ab382e 100644
--- a/pkg/vectorize/sum/avx2.s
+++ b/pkg/vectorize/sum/sum_avx2_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx2_gen.go -out avx2.s -stubs avx2_stubs.go. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/sum/avx512.s b/pkg/vectorize/sum/sum_avx512_amd64.s
similarity index 99%
rename from pkg/vectorize/sum/avx512.s
rename to pkg/vectorize/sum/sum_avx512_amd64.s
index 7fb4b21ecd9732a5d95ec8d294163b4c3c0327b0..399cc1031f85953cf3f4c34b9762b8b3073ecb56 100644
--- a/pkg/vectorize/sum/avx512.s
+++ b/pkg/vectorize/sum/sum_avx512_amd64.s
@@ -1,5 +1,4 @@
 // Code generated by command: go run avx512.go -out avx512.s. DO NOT EDIT.
-// +build amd64
 
 #include "textflag.h"
 
diff --git a/pkg/vectorize/sum/sum_test.go b/pkg/vectorize/sum/sum_test.go
index 3debaab2f258da8dfee0949c289ce387624cc3ad..d772494c15431195c41514eb029d34a03a47ffcf 100644
--- a/pkg/vectorize/sum/sum_test.go
+++ b/pkg/vectorize/sum/sum_test.go
@@ -38,11 +38,11 @@ func makeFbuffer(l int) []float64 {
 func TestF64Sum(t *testing.T) {
 	xs := makeFbuffer(10000)
 	fmt.Printf("sum: %v\n", Float64Sum(xs))
-	fmt.Printf("pure sum: %v\n", float64Sum(xs))
+	fmt.Printf("pure sum: %v\n", floatSum(xs))
 }
 
 func TestI64Sum(t *testing.T) {
 	xs := makeIbuffer(10000)
 	fmt.Printf("sum: %v\n", Int64Sum(xs))
-	fmt.Printf("pure sum: %v\n", int64Sum(xs))
+	fmt.Printf("pure sum: %v\n", signedSum(xs))
 }
diff --git a/pkg/vectorize/typecast/typecast.go b/pkg/vectorize/typecast/typecast.go
index 93fd8e0644ba4989f5c86a26e74fbbf04b184998..f8f98740c908f9abc004a6f7864f9609514af5bc 100644
--- a/pkg/vectorize/typecast/typecast.go
+++ b/pkg/vectorize/typecast/typecast.go
@@ -16,3203 +16,164 @@ package typecast
 
 import (
 	"strconv"
+	"unsafe"
 
 	"github.com/matrixorigin/matrixone/pkg/container/types"
+	"golang.org/x/exp/constraints"
+)
 
-	"golang.org/x/sys/cpu"
-)
-
-var (
-	int16ToInt8   func([]int16, []int8) ([]int8, error)
-	int32ToInt8   func([]int32, []int8) ([]int8, error)
-	int64ToInt8   func([]int64, []int8) ([]int8, error)
-	uint8ToInt8   func([]uint8, []int8) ([]int8, error)
-	uint16ToInt8  func([]uint16, []int8) ([]int8, error)
-	uint32ToInt8  func([]uint32, []int8) ([]int8, error)
-	uint64ToInt8  func([]uint64, []int8) ([]int8, error)
-	float32ToInt8 func([]float32, []int8) ([]int8, error)
-	float64ToInt8 func([]float64, []int8) ([]int8, error)
-
-	int8ToInt16    func([]int8, []int16) ([]int16, error)
-	int32ToInt16   func([]int32, []int16) ([]int16, error)
-	int64ToInt16   func([]int64, []int16) ([]int16, error)
-	uint8ToInt16   func([]uint8, []int16) ([]int16, error)
-	uint16ToInt16  func([]uint16, []int16) ([]int16, error)
-	uint32ToInt16  func([]uint32, []int16) ([]int16, error)
-	uint64ToInt16  func([]uint64, []int16) ([]int16, error)
-	float32ToInt16 func([]float32, []int16) ([]int16, error)
-	float64ToInt16 func([]float64, []int16) ([]int16, error)
-
-	int8ToInt32    func([]int8, []int32) ([]int32, error)
-	int16ToInt32   func([]int16, []int32) ([]int32, error)
-	int64ToInt32   func([]int64, []int32) ([]int32, error)
-	uint8ToInt32   func([]uint8, []int32) ([]int32, error)
-	uint16ToInt32  func([]uint16, []int32) ([]int32, error)
-	uint32ToInt32  func([]uint32, []int32) ([]int32, error)
-	uint64ToInt32  func([]uint64, []int32) ([]int32, error)
-	float32ToInt32 func([]float32, []int32) ([]int32, error)
-	float64ToInt32 func([]float64, []int32) ([]int32, error)
-
-	int8ToInt64    func([]int8, []int64) ([]int64, error)
-	int16ToInt64   func([]int16, []int64) ([]int64, error)
-	int32ToInt64   func([]int32, []int64) ([]int64, error)
-	uint8ToInt64   func([]uint8, []int64) ([]int64, error)
-	uint16ToInt64  func([]uint16, []int64) ([]int64, error)
-	uint32ToInt64  func([]uint32, []int64) ([]int64, error)
-	uint64ToInt64  func([]uint64, []int64) ([]int64, error)
-	float32ToInt64 func([]float32, []int64) ([]int64, error)
-	float64ToInt64 func([]float64, []int64) ([]int64, error)
-
-	int8ToUint8    func([]int8, []uint8) ([]uint8, error)
-	int16ToUint8   func([]int16, []uint8) ([]uint8, error)
-	int32ToUint8   func([]int32, []uint8) ([]uint8, error)
-	int64ToUint8   func([]int64, []uint8) ([]uint8, error)
-	uint16ToUint8  func([]uint16, []uint8) ([]uint8, error)
-	uint32ToUint8  func([]uint32, []uint8) ([]uint8, error)
-	uint64ToUint8  func([]uint64, []uint8) ([]uint8, error)
-	float32ToUint8 func([]float32, []uint8) ([]uint8, error)
-	float64ToUint8 func([]float64, []uint8) ([]uint8, error)
-
-	int8ToUint16    func([]int8, []uint16) ([]uint16, error)
-	int16ToUint16   func([]int16, []uint16) ([]uint16, error)
-	int32ToUint16   func([]int32, []uint16) ([]uint16, error)
-	int64ToUint16   func([]int64, []uint16) ([]uint16, error)
-	uint8ToUint16   func([]uint8, []uint16) ([]uint16, error)
-	uint32ToUint16  func([]uint32, []uint16) ([]uint16, error)
-	uint64ToUint16  func([]uint64, []uint16) ([]uint16, error)
-	float32ToUint16 func([]float32, []uint16) ([]uint16, error)
-	float64ToUint16 func([]float64, []uint16) ([]uint16, error)
-
-	int8ToUint32    func([]int8, []uint32) ([]uint32, error)
-	int16ToUint32   func([]int16, []uint32) ([]uint32, error)
-	int32ToUint32   func([]int32, []uint32) ([]uint32, error)
-	int64ToUint32   func([]int64, []uint32) ([]uint32, error)
-	uint8ToUint32   func([]uint8, []uint32) ([]uint32, error)
-	uint16ToUint32  func([]uint16, []uint32) ([]uint32, error)
-	uint64ToUint32  func([]uint64, []uint32) ([]uint32, error)
-	float32ToUint32 func([]float32, []uint32) ([]uint32, error)
-	float64ToUint32 func([]float64, []uint32) ([]uint32, error)
-
-	int8ToUint64    func([]int8, []uint64) ([]uint64, error)
-	int16ToUint64   func([]int16, []uint64) ([]uint64, error)
-	int32ToUint64   func([]int32, []uint64) ([]uint64, error)
-	int64ToUint64   func([]int64, []uint64) ([]uint64, error)
-	uint8ToUint64   func([]uint8, []uint64) ([]uint64, error)
-	uint16ToUint64  func([]uint16, []uint64) ([]uint64, error)
-	uint32ToUint64  func([]uint32, []uint64) ([]uint64, error)
-	float32ToUint64 func([]float32, []uint64) ([]uint64, error)
-	float64ToUint64 func([]float64, []uint64) ([]uint64, error)
-
-	int8ToFloat32    func([]int8, []float32) ([]float32, error)
-	int16ToFloat32   func([]int16, []float32) ([]float32, error)
-	int32ToFloat32   func([]int32, []float32) ([]float32, error)
-	int64ToFloat32   func([]int64, []float32) ([]float32, error)
-	uint8ToFloat32   func([]uint8, []float32) ([]float32, error)
-	uint16ToFloat32  func([]uint16, []float32) ([]float32, error)
-	uint32ToFloat32  func([]uint32, []float32) ([]float32, error)
-	uint64ToFloat32  func([]uint64, []float32) ([]float32, error)
-	float64ToFloat32 func([]float64, []float32) ([]float32, error)
-
-	int8ToFloat64    func([]int8, []float64) ([]float64, error)
-	int16ToFloat64   func([]int16, []float64) ([]float64, error)
-	int32ToFloat64   func([]int32, []float64) ([]float64, error)
-	int64ToFloat64   func([]int64, []float64) ([]float64, error)
-	uint8ToFloat64   func([]uint8, []float64) ([]float64, error)
-	uint16ToFloat64  func([]uint16, []float64) ([]float64, error)
-	uint32ToFloat64  func([]uint32, []float64) ([]float64, error)
-	uint64ToFloat64  func([]uint64, []float64) ([]float64, error)
-	float32ToFloat64 func([]float32, []float64) ([]float64, error)
-
-	bytesToInt8    func(*types.Bytes, []int8) ([]int8, error)
-	int8ToBytes    func([]int8, *types.Bytes) (*types.Bytes, error)
-	bytesToInt16   func(*types.Bytes, []int16) ([]int16, error)
-	int16ToBytes   func([]int16, *types.Bytes) (*types.Bytes, error)
-	bytesToInt32   func(*types.Bytes, []int32) ([]int32, error)
-	int32ToBytes   func([]int32, *types.Bytes) (*types.Bytes, error)
-	bytesToInt64   func(*types.Bytes, []int64) ([]int64, error)
-	int64ToBytes   func([]int64, *types.Bytes) (*types.Bytes, error)
-	bytesToUint8   func(*types.Bytes, []uint8) ([]uint8, error)
-	uint8ToBytes   func([]uint8, *types.Bytes) (*types.Bytes, error)
-	bytesToUint16  func(*types.Bytes, []uint16) ([]uint16, error)
-	uint16ToBytes  func([]uint16, *types.Bytes) (*types.Bytes, error)
-	bytesToUint32  func(*types.Bytes, []uint32) ([]uint32, error)
-	uint32ToBytes  func([]uint32, *types.Bytes) (*types.Bytes, error)
-	bytesToUint64  func(*types.Bytes, []uint64) ([]uint64, error)
-	uint64ToBytes  func([]uint64, *types.Bytes) (*types.Bytes, error)
-	bytesToFloat32 func(*types.Bytes, []float32) ([]float32, error)
-	float32ToBytes func([]float32, *types.Bytes) (*types.Bytes, error)
-	bytesToFloat64 func(*types.Bytes, []float64) ([]float64, error)
-	float64ToBytes func([]float64, *types.Bytes) (*types.Bytes, error)
-
-	decimal64ToDecimal128 func([]types.Decimal64, []types.Decimal128) ([]types.Decimal128, error)
-
-	int8ToDecimal128   func([]int8, []types.Decimal128) ([]types.Decimal128, error)
-	int16ToDecimal128  func([]int16, []types.Decimal128) ([]types.Decimal128, error)
-	int32ToDecimal128  func([]int32, []types.Decimal128) ([]types.Decimal128, error)
-	int64ToDecimal128  func([]int64, []types.Decimal128) ([]types.Decimal128, error)
-	uint8ToDecimal128  func([]uint8, []types.Decimal128) ([]types.Decimal128, error)
-	uint16ToDecimal128 func([]uint16, []types.Decimal128) ([]types.Decimal128, error)
-	uint32ToDecimal128 func([]uint32, []types.Decimal128) ([]types.Decimal128, error)
-	uint64ToDecimal128 func([]uint64, []types.Decimal128) ([]types.Decimal128, error)
-)
-
-func init() {
-	if cpu.X86.HasAVX512 {
-		int16ToInt8 = int16ToInt8Pure
-		int32ToInt8 = int32ToInt8Pure
-		int64ToInt8 = int64ToInt8Pure
-		uint8ToInt8 = uint8ToInt8Pure
-		uint16ToInt8 = uint16ToInt8Pure
-		uint32ToInt8 = uint32ToInt8Pure
-		uint64ToInt8 = uint64ToInt8Pure
-		float32ToInt8 = float32ToInt8Pure
-		float64ToInt8 = float64ToInt8Pure
-
-		int8ToInt16 = int8ToInt16Pure
-		int32ToInt16 = int32ToInt16Pure
-		int64ToInt16 = int64ToInt16Pure
-		uint8ToInt16 = uint8ToInt16Pure
-		uint16ToInt16 = uint16ToInt16Pure
-		uint32ToInt16 = uint32ToInt16Pure
-		uint64ToInt16 = uint64ToInt16Pure
-		float32ToInt16 = float32ToInt16Pure
-		float64ToInt16 = float64ToInt16Pure
-
-		int8ToInt32 = int8ToInt32Pure
-		int16ToInt32 = int16ToInt32Pure
-		int64ToInt32 = int64ToInt32Pure
-		uint8ToInt32 = uint8ToInt32Pure
-		uint16ToInt32 = uint16ToInt32Pure
-		uint32ToInt32 = uint32ToInt32Pure
-		uint64ToInt32 = uint64ToInt32Pure
-		float32ToInt32 = float32ToInt32Pure
-		float64ToInt32 = float64ToInt32Pure
-
-		int8ToInt64 = int8ToInt64Pure
-		int16ToInt64 = int16ToInt64Pure
-		int32ToInt64 = int32ToInt64Pure
-		uint8ToInt64 = uint8ToInt64Pure
-		uint16ToInt64 = uint16ToInt64Pure
-		uint32ToInt64 = uint32ToInt64Pure
-		uint64ToInt64 = uint64ToInt64Pure
-		float32ToInt64 = float32ToInt64Pure
-		float64ToInt64 = float64ToInt64Pure
-
-		int8ToUint8 = int8ToUint8Pure
-		int16ToUint8 = int16ToUint8Pure
-		int32ToUint8 = int32ToUint8Pure
-		int64ToUint8 = int64ToUint8Pure
-		uint16ToUint8 = uint16ToUint8Pure
-		uint32ToUint8 = uint32ToUint8Pure
-		uint64ToUint8 = uint64ToUint8Pure
-		float32ToUint8 = float32ToUint8Pure
-		float64ToUint8 = float64ToUint8Pure
-
-		int8ToUint16 = int8ToUint16Pure
-		int16ToUint16 = int16ToUint16Pure
-		int32ToUint16 = int32ToUint16Pure
-		int64ToUint16 = int64ToUint16Pure
-		uint8ToUint16 = uint8ToUint16Pure
-		uint32ToUint16 = uint32ToUint16Pure
-		uint64ToUint16 = uint64ToUint16Pure
-		float32ToUint16 = float32ToUint16Pure
-		float64ToUint16 = float64ToUint16Pure
-
-		int8ToUint32 = int8ToUint32Pure
-		int16ToUint32 = int16ToUint32Pure
-		int32ToUint32 = int32ToUint32Pure
-		int64ToUint32 = int64ToUint32Pure
-		uint8ToUint32 = uint8ToUint32Pure
-		uint16ToUint32 = uint16ToUint32Pure
-		uint64ToUint32 = uint64ToUint32Pure
-		float32ToUint32 = float32ToUint32Pure
-		float64ToUint32 = float64ToUint32Pure
-
-		int8ToUint64 = int8ToUint64Pure
-		int16ToUint64 = int16ToUint64Pure
-		int32ToUint64 = int32ToUint64Pure
-		int64ToUint64 = int64ToUint64Pure
-		uint8ToUint64 = uint8ToUint64Pure
-		uint16ToUint64 = uint16ToUint64Pure
-		uint32ToUint64 = uint32ToUint64Pure
-		float32ToUint64 = float32ToUint64Pure
-		float64ToUint64 = float64ToUint64Pure
-
-		int8ToFloat32 = int8ToFloat32Pure
-		int16ToFloat32 = int16ToFloat32Pure
-		int32ToFloat32 = int32ToFloat32Pure
-		int64ToFloat32 = int64ToFloat32Pure
-		uint8ToFloat32 = uint8ToFloat32Pure
-		uint16ToFloat32 = uint16ToFloat32Pure
-		uint32ToFloat32 = uint32ToFloat32Pure
-		uint64ToFloat32 = uint64ToFloat32Pure
-		float64ToFloat32 = float64ToFloat32Pure
-
-		int8ToFloat64 = int8ToFloat64Pure
-		int16ToFloat64 = int16ToFloat64Pure
-		int32ToFloat64 = int32ToFloat64Pure
-		int64ToFloat64 = int64ToFloat64Pure
-		uint8ToFloat64 = uint8ToFloat64Pure
-		uint16ToFloat64 = uint16ToFloat64Pure
-		uint32ToFloat64 = uint32ToFloat64Pure
-		uint64ToFloat64 = uint64ToFloat64Pure
-		float32ToFloat64 = float32ToFloat64Pure
-
-		decimal64ToDecimal128 = decimal64ToDecimal128Pure
-
-		int8ToDecimal128 = int8ToDecimal128Pure
-		int16ToDecimal128 = int16ToDecimal128Pure
-		int32ToDecimal128 = int32ToDecimal128Pure
-		int64ToDecimal128 = int64ToDecimal128Pure
-	} else if cpu.X86.HasAVX2 {
-		int16ToInt8 = int16ToInt8Pure
-		int32ToInt8 = int32ToInt8Pure
-		int64ToInt8 = int64ToInt8Pure
-		uint8ToInt8 = uint8ToInt8Pure
-		uint16ToInt8 = uint16ToInt8Pure
-		uint32ToInt8 = uint32ToInt8Pure
-		uint64ToInt8 = uint64ToInt8Pure
-		float32ToInt8 = float32ToInt8Pure
-		float64ToInt8 = float64ToInt8Pure
-
-		int8ToInt16 = int8ToInt16Pure
-		int32ToInt16 = int32ToInt16Pure
-		int64ToInt16 = int64ToInt16Pure
-		uint8ToInt16 = uint8ToInt16Pure
-		uint16ToInt16 = uint16ToInt16Pure
-		uint32ToInt16 = uint32ToInt16Pure
-		uint64ToInt16 = uint64ToInt16Pure
-		float32ToInt16 = float32ToInt16Pure
-		float64ToInt16 = float64ToInt16Pure
-
-		int8ToInt32 = int8ToInt32Pure
-		int16ToInt32 = int16ToInt32Pure
-		int64ToInt32 = int64ToInt32Pure
-		uint8ToInt32 = uint8ToInt32Pure
-		uint16ToInt32 = uint16ToInt32Pure
-		uint32ToInt32 = uint32ToInt32Pure
-		uint64ToInt32 = uint64ToInt32Pure
-		float32ToInt32 = float32ToInt32Pure
-		float64ToInt32 = float64ToInt32Pure
-
-		int8ToInt64 = int8ToInt64Pure
-		int16ToInt64 = int16ToInt64Pure
-		int32ToInt64 = int32ToInt64Pure
-		uint8ToInt64 = uint8ToInt64Pure
-		uint16ToInt64 = uint16ToInt64Pure
-		uint32ToInt64 = uint32ToInt64Pure
-		uint64ToInt64 = uint64ToInt64Pure
-		float32ToInt64 = float32ToInt64Pure
-		float64ToInt64 = float64ToInt64Pure
-
-		int8ToUint8 = int8ToUint8Pure
-		int16ToUint8 = int16ToUint8Pure
-		int32ToUint8 = int32ToUint8Pure
-		int64ToUint8 = int64ToUint8Pure
-		uint16ToUint8 = uint16ToUint8Pure
-		uint32ToUint8 = uint32ToUint8Pure
-		uint64ToUint8 = uint64ToUint8Pure
-		float32ToUint8 = float32ToUint8Pure
-		float64ToUint8 = float64ToUint8Pure
-
-		int8ToUint16 = int8ToUint16Pure
-		int16ToUint16 = int16ToUint16Pure
-		int32ToUint16 = int32ToUint16Pure
-		int64ToUint16 = int64ToUint16Pure
-		uint8ToUint16 = uint8ToUint16Pure
-		uint32ToUint16 = uint32ToUint16Pure
-		uint64ToUint16 = uint64ToUint16Pure
-		float32ToUint16 = float32ToUint16Pure
-		float64ToUint16 = float64ToUint16Pure
-
-		int8ToUint32 = int8ToUint32Pure
-		int16ToUint32 = int16ToUint32Pure
-		int32ToUint32 = int32ToUint32Pure
-		int64ToUint32 = int64ToUint32Pure
-		uint8ToUint32 = uint8ToUint32Pure
-		uint16ToUint32 = uint16ToUint32Pure
-		uint64ToUint32 = uint64ToUint32Pure
-		float32ToUint32 = float32ToUint32Pure
-		float64ToUint32 = float64ToUint32Pure
-
-		int8ToUint64 = int8ToUint64Pure
-		int16ToUint64 = int16ToUint64Pure
-		int32ToUint64 = int32ToUint64Pure
-		int64ToUint64 = int64ToUint64Pure
-		uint8ToUint64 = uint8ToUint64Pure
-		uint16ToUint64 = uint16ToUint64Pure
-		uint32ToUint64 = uint32ToUint64Pure
-		float32ToUint64 = float32ToUint64Pure
-		float64ToUint64 = float64ToUint64Pure
-
-		int8ToFloat32 = int8ToFloat32Pure
-		int16ToFloat32 = int16ToFloat32Pure
-		int32ToFloat32 = int32ToFloat32Pure
-		int64ToFloat32 = int64ToFloat32Pure
-		uint8ToFloat32 = uint8ToFloat32Pure
-		uint16ToFloat32 = uint16ToFloat32Pure
-		uint32ToFloat32 = uint32ToFloat32Pure
-		uint64ToFloat32 = uint64ToFloat32Pure
-		float64ToFloat32 = float64ToFloat32Pure
-
-		int8ToFloat64 = int8ToFloat64Pure
-		int16ToFloat64 = int16ToFloat64Pure
-		int32ToFloat64 = int32ToFloat64Pure
-		int64ToFloat64 = int64ToFloat64Pure
-		uint8ToFloat64 = uint8ToFloat64Pure
-		uint16ToFloat64 = uint16ToFloat64Pure
-		uint32ToFloat64 = uint32ToFloat64Pure
-		uint64ToFloat64 = uint64ToFloat64Pure
-		float32ToFloat64 = float32ToFloat64Pure
-
-		decimal64ToDecimal128 = decimal64ToDecimal128Pure
-
-		int8ToDecimal128 = int8ToDecimal128Pure
-		int16ToDecimal128 = int16ToDecimal128Pure
-		int32ToDecimal128 = int32ToDecimal128Pure
-		int64ToDecimal128 = int64ToDecimal128Pure
-	} else {
-		int16ToInt8 = int16ToInt8Pure
-		int32ToInt8 = int32ToInt8Pure
-		int64ToInt8 = int64ToInt8Pure
-		uint8ToInt8 = uint8ToInt8Pure
-		uint16ToInt8 = uint16ToInt8Pure
-		uint32ToInt8 = uint32ToInt8Pure
-		uint64ToInt8 = uint64ToInt8Pure
-		float32ToInt8 = float32ToInt8Pure
-		float64ToInt8 = float64ToInt8Pure
-
-		int8ToInt16 = int8ToInt16Pure
-		int32ToInt16 = int32ToInt16Pure
-		int64ToInt16 = int64ToInt16Pure
-		uint8ToInt16 = uint8ToInt16Pure
-		uint16ToInt16 = uint16ToInt16Pure
-		uint32ToInt16 = uint32ToInt16Pure
-		uint64ToInt16 = uint64ToInt16Pure
-		float32ToInt16 = float32ToInt16Pure
-		float64ToInt16 = float64ToInt16Pure
-
-		int8ToInt32 = int8ToInt32Pure
-		int16ToInt32 = int16ToInt32Pure
-		int64ToInt32 = int64ToInt32Pure
-		uint8ToInt32 = uint8ToInt32Pure
-		uint16ToInt32 = uint16ToInt32Pure
-		uint32ToInt32 = uint32ToInt32Pure
-		uint64ToInt32 = uint64ToInt32Pure
-		float32ToInt32 = float32ToInt32Pure
-		float64ToInt32 = float64ToInt32Pure
-
-		int8ToInt64 = int8ToInt64Pure
-		int16ToInt64 = int16ToInt64Pure
-		int32ToInt64 = int32ToInt64Pure
-		uint8ToInt64 = uint8ToInt64Pure
-		uint16ToInt64 = uint16ToInt64Pure
-		uint32ToInt64 = uint32ToInt64Pure
-		uint64ToInt64 = uint64ToInt64Pure
-		float32ToInt64 = float32ToInt64Pure
-		float64ToInt64 = float64ToInt64Pure
-
-		int8ToUint8 = int8ToUint8Pure
-		int16ToUint8 = int16ToUint8Pure
-		int32ToUint8 = int32ToUint8Pure
-		int64ToUint8 = int64ToUint8Pure
-		uint16ToUint8 = uint16ToUint8Pure
-		uint32ToUint8 = uint32ToUint8Pure
-		uint64ToUint8 = uint64ToUint8Pure
-		float32ToUint8 = float32ToUint8Pure
-		float64ToUint8 = float64ToUint8Pure
-
-		int8ToUint16 = int8ToUint16Pure
-		int16ToUint16 = int16ToUint16Pure
-		int32ToUint16 = int32ToUint16Pure
-		int64ToUint16 = int64ToUint16Pure
-		uint8ToUint16 = uint8ToUint16Pure
-		uint32ToUint16 = uint32ToUint16Pure
-		uint64ToUint16 = uint64ToUint16Pure
-		float32ToUint16 = float32ToUint16Pure
-		float64ToUint16 = float64ToUint16Pure
-
-		int8ToUint32 = int8ToUint32Pure
-		int16ToUint32 = int16ToUint32Pure
-		int32ToUint32 = int32ToUint32Pure
-		int64ToUint32 = int64ToUint32Pure
-		uint8ToUint32 = uint8ToUint32Pure
-		uint16ToUint32 = uint16ToUint32Pure
-		uint64ToUint32 = uint64ToUint32Pure
-		float32ToUint32 = float32ToUint32Pure
-		float64ToUint32 = float64ToUint32Pure
-
-		int8ToUint64 = int8ToUint64Pure
-		int16ToUint64 = int16ToUint64Pure
-		int32ToUint64 = int32ToUint64Pure
-		int64ToUint64 = int64ToUint64Pure
-		uint8ToUint64 = uint8ToUint64Pure
-		uint16ToUint64 = uint16ToUint64Pure
-		uint32ToUint64 = uint32ToUint64Pure
-		float32ToUint64 = float32ToUint64Pure
-		float64ToUint64 = float64ToUint64Pure
-
-		int8ToFloat32 = int8ToFloat32Pure
-		int16ToFloat32 = int16ToFloat32Pure
-		int32ToFloat32 = int32ToFloat32Pure
-		int64ToFloat32 = int64ToFloat32Pure
-		uint8ToFloat32 = uint8ToFloat32Pure
-		uint16ToFloat32 = uint16ToFloat32Pure
-		uint32ToFloat32 = uint32ToFloat32Pure
-		uint64ToFloat32 = uint64ToFloat32Pure
-		float64ToFloat32 = float64ToFloat32Pure
-
-		int8ToFloat64 = int8ToFloat64Pure
-		int16ToFloat64 = int16ToFloat64Pure
-		int32ToFloat64 = int32ToFloat64Pure
-		int64ToFloat64 = int64ToFloat64Pure
-		uint8ToFloat64 = uint8ToFloat64Pure
-		uint16ToFloat64 = uint16ToFloat64Pure
-		uint32ToFloat64 = uint32ToFloat64Pure
-		uint64ToFloat64 = uint64ToFloat64Pure
-		float32ToFloat64 = float32ToFloat64Pure
-
-		decimal64ToDecimal128 = decimal64ToDecimal128Pure
-
-		int8ToDecimal128 = int8ToDecimal128Pure
-		int16ToDecimal128 = int16ToDecimal128Pure
-		int32ToDecimal128 = int32ToDecimal128Pure
-		int64ToDecimal128 = int64ToDecimal128Pure
-	}
-
-	bytesToInt8 = bytesToInt8Pure
-	int8ToBytes = int8ToBytesPure
-	bytesToInt16 = bytesToInt16Pure
-	int16ToBytes = int16ToBytesPure
-	bytesToInt32 = bytesToInt32Pure
-	int32ToBytes = int32ToBytesPure
-	bytesToInt64 = bytesToInt64Pure
-	int64ToBytes = int64ToBytesPure
-	bytesToUint8 = bytesToUint8Pure
-	uint8ToBytes = uint8ToBytesPure
-	bytesToUint16 = bytesToUint16Pure
-	uint16ToBytes = uint16ToBytesPure
-	bytesToUint32 = bytesToUint32Pure
-	uint32ToBytes = uint32ToBytesPure
-	bytesToUint64 = bytesToUint64Pure
-	uint64ToBytes = uint64ToBytesPure
-	bytesToFloat32 = bytesToFloat32Pure
-	float32ToBytes = float32ToBytesPure
-	bytesToFloat64 = bytesToFloat64Pure
-	float64ToBytes = float64ToBytesPure
-}
-
-func Int16ToInt8(xs []int16, rs []int8) ([]int8, error) {
-	return int16ToInt8(xs, rs)
-}
-
-func int16ToInt8Pure(xs []int16, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToInt8Avx512(xs []int16, rs []int8) ([]int8, error) {
-	n := len(xs) / 8
-	int16ToInt8Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToInt8(xs []int32, rs []int8) ([]int8, error) {
-	return int32ToInt8(xs, rs)
-}
-
-func int32ToInt8Pure(xs []int32, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToInt8Avx512(xs []int32, rs []int8) ([]int8, error) {
-	n := len(xs) / 4
-	int32ToInt8Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToInt8(xs []int64, rs []int8) ([]int8, error) {
-	return int64ToInt8(xs, rs)
-}
-
-func int64ToInt8Pure(xs []int64, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToInt8Avx512(xs []int64, rs []int8) ([]int8, error) {
-	n := len(xs) / 2
-	int64ToInt8Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToInt8(xs []uint8, rs []int8) ([]int8, error) {
-	return uint8ToInt8(xs, rs)
-}
-
-func uint8ToInt8Pure(xs []uint8, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToInt8Avx2(xs []uint8, rs []int8) ([]int8, error) {
-	n := len(xs) / 16
-	uint8ToInt8Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 16, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToInt8Avx512(xs []uint8, rs []int8) ([]int8, error) {
-	n := len(xs) / 16
-	uint8ToInt8Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 16, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToInt8(xs []uint16, rs []int8) ([]int8, error) {
-	return uint16ToInt8(xs, rs)
-}
-
-func uint16ToInt8Pure(xs []uint16, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToInt8Avx512(xs []uint16, rs []int8) ([]int8, error) {
-	n := len(xs) / 8
-	uint16ToInt8Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToInt8(xs []uint32, rs []int8) ([]int8, error) {
-	return uint32ToInt8(xs, rs)
-}
-
-func uint32ToInt8Pure(xs []uint32, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToInt8Avx512(xs []uint32, rs []int8) ([]int8, error) {
-	n := len(xs) / 4
-	uint32ToInt8Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToInt8(xs []uint64, rs []int8) ([]int8, error) {
-	return uint64ToInt8(xs, rs)
-}
-
-func uint64ToInt8Pure(xs []uint64, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToInt8Avx512(xs []uint64, rs []int8) ([]int8, error) {
-	n := len(xs) / 2
-	uint64ToInt8Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToInt8(xs []float32, rs []int8) ([]int8, error) {
-	return float32ToInt8(xs, rs)
-}
-
-func float32ToInt8Pure(xs []float32, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToInt8Avx512(xs []float32, rs []int8) ([]int8, error) {
-	n := len(xs) / 4
-	float32ToInt8Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToInt8(xs []float64, rs []int8) ([]int8, error) {
-	return float64ToInt8(xs, rs)
-}
-
-func float64ToInt8Pure(xs []float64, rs []int8) ([]int8, error) {
-	for i, x := range xs {
-		rs[i] = int8(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToInt8Avx512(xs []float64, rs []int8) ([]int8, error) {
-	n := len(xs) / 2
-	float64ToInt8Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToInt16(xs []int8, rs []int16) ([]int16, error) {
-	return int8ToInt16(xs, rs)
-}
-
-func int8ToInt16Pure(xs []int8, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToInt16Avx2(xs []int8, rs []int16) ([]int16, error) {
-	n := len(xs) / 8
-	int8ToInt16Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToInt16Avx512(xs []int8, rs []int16) ([]int16, error) {
-	n := len(xs) / 8
-	int8ToInt16Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToInt16(xs []int32, rs []int16) ([]int16, error) {
-	return int32ToInt16(xs, rs)
-}
-
-func int32ToInt16Pure(xs []int32, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToInt16Avx512(xs []int32, rs []int16) ([]int16, error) {
-	n := len(xs) / 4
-	int32ToInt16Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToInt16(xs []int64, rs []int16) ([]int16, error) {
-	return int64ToInt16(xs, rs)
-}
-
-func int64ToInt16Pure(xs []int64, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToInt16Avx512(xs []int64, rs []int16) ([]int16, error) {
-	n := len(xs) / 2
-	int64ToInt16Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToInt16(xs []uint8, rs []int16) ([]int16, error) {
-	return uint8ToInt16(xs, rs)
-}
-
-func uint8ToInt16Pure(xs []uint8, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToInt16Avx2(xs []uint8, rs []int16) ([]int16, error) {
-	n := len(xs) / 8
-	uint8ToInt16Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToInt16Avx512(xs []uint8, rs []int16) ([]int16, error) {
-	n := len(xs) / 8
-	uint8ToInt16Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToInt16(xs []uint16, rs []int16) ([]int16, error) {
-	return uint16ToInt16(xs, rs)
-}
-
-func uint16ToInt16Pure(xs []uint16, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToInt16Avx2(xs []uint16, rs []int16) ([]int16, error) {
-	n := len(xs) / 8
-	uint16ToInt16Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToInt16Avx512(xs []uint16, rs []int16) ([]int16, error) {
-	n := len(xs) / 8
-	uint16ToInt16Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToInt16(xs []uint32, rs []int16) ([]int16, error) {
-	return uint32ToInt16(xs, rs)
-}
-
-func uint32ToInt16Pure(xs []uint32, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToInt16Avx512(xs []uint32, rs []int16) ([]int16, error) {
-	n := len(xs) / 4
-	uint32ToInt16Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToInt16(xs []uint64, rs []int16) ([]int16, error) {
-	return uint64ToInt16(xs, rs)
-}
-
-func uint64ToInt16Pure(xs []uint64, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToInt16Avx512(xs []uint64, rs []int16) ([]int16, error) {
-	n := len(xs) / 2
-	uint64ToInt16Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToInt16(xs []float32, rs []int16) ([]int16, error) {
-	return float32ToInt16(xs, rs)
-}
-
-func float32ToInt16Pure(xs []float32, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToInt16Avx512(xs []float32, rs []int16) ([]int16, error) {
-	n := len(xs) / 4
-	float32ToInt16Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToInt16(xs []float64, rs []int16) ([]int16, error) {
-	return float64ToInt16(xs, rs)
-}
-
-func float64ToInt16Pure(xs []float64, rs []int16) ([]int16, error) {
-	for i, x := range xs {
-		rs[i] = int16(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToInt16Avx512(xs []float64, rs []int16) ([]int16, error) {
-	n := len(xs) / 2
-	float64ToInt16Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToInt32(xs []int8, rs []int32) ([]int32, error) {
-	return int8ToInt32(xs, rs)
-}
-
-func int8ToInt32Pure(xs []int8, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToInt32Avx2(xs []int8, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	int8ToInt32Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToInt32Avx512(xs []int8, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	int8ToInt32Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToInt32(xs []int16, rs []int32) ([]int32, error) {
-	return int16ToInt32(xs, rs)
-}
-
-func int16ToInt32Pure(xs []int16, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToInt32Avx2(xs []int16, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	int16ToInt32Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToInt32Avx512(xs []int16, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	int16ToInt32Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToInt32(xs []int64, rs []int32) ([]int32, error) {
-	return int64ToInt32(xs, rs)
-}
-
-func int64ToInt32Pure(xs []int64, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToInt32Avx512(xs []int64, rs []int32) ([]int32, error) {
-	n := len(xs) / 2
-	int64ToInt32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToInt32(xs []uint8, rs []int32) ([]int32, error) {
-	return uint8ToInt32(xs, rs)
-}
-
-func uint8ToInt32Pure(xs []uint8, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToInt32Avx2(xs []uint8, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	uint8ToInt32Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToInt32Avx512(xs []uint8, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	uint8ToInt32Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToInt32(xs []uint16, rs []int32) ([]int32, error) {
-	return uint16ToInt32(xs, rs)
-}
-
-func uint16ToInt32Pure(xs []uint16, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToInt32Avx2(xs []uint16, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	uint16ToInt32Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToInt32Avx512(xs []uint16, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	uint16ToInt32Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToInt32(xs []uint32, rs []int32) ([]int32, error) {
-	return uint32ToInt32(xs, rs)
-}
-
-func uint32ToInt32Pure(xs []uint32, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToInt32Avx2(xs []uint32, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	uint32ToInt32Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint32ToInt32Avx512(xs []uint32, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	uint32ToInt32Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToInt32(xs []uint64, rs []int32) ([]int32, error) {
-	return uint64ToInt32(xs, rs)
-}
-
-func uint64ToInt32Pure(xs []uint64, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToInt32Avx512(xs []uint64, rs []int32) ([]int32, error) {
-	n := len(xs) / 2
-	uint64ToInt32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToInt32(xs []float32, rs []int32) ([]int32, error) {
-	return float32ToInt32(xs, rs)
-}
-
-func float32ToInt32Pure(xs []float32, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToInt32Avx2(xs []float32, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	float32ToInt32Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func float32ToInt32Avx512(xs []float32, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	float32ToInt32Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToInt32(xs []float64, rs []int32) ([]int32, error) {
-	return float64ToInt32(xs, rs)
-}
-
-func float64ToInt32Pure(xs []float64, rs []int32) ([]int32, error) {
-	for i, x := range xs {
-		rs[i] = int32(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToInt32Avx2(xs []float64, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	float64ToInt32Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-
-func float64ToInt32Avx512(xs []float64, rs []int32) ([]int32, error) {
-	n := len(xs) / 4
-	float64ToInt32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = int32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToInt64(xs []int8, rs []int64) ([]int64, error) {
-	return int8ToInt64(xs, rs)
-}
-
-func int8ToInt64Pure(xs []int8, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToInt64Avx2(xs []int8, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	int8ToInt64Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToInt64Avx512(xs []int8, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	int8ToInt64Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToInt64(xs []int16, rs []int64) ([]int64, error) {
-	return int16ToInt64(xs, rs)
-}
-
-func int16ToInt64Pure(xs []int16, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToInt64Avx2(xs []int16, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	int16ToInt64Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToInt64Avx512(xs []int16, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	int16ToInt64Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToInt64(xs []int32, rs []int64) ([]int64, error) {
-	return int32ToInt64(xs, rs)
-}
-
-func int32ToInt64Pure(xs []int32, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToInt64Avx2(xs []int32, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	int32ToInt64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func int32ToInt64Avx512(xs []int32, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	int32ToInt64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToInt64(xs []uint8, rs []int64) ([]int64, error) {
-	return uint8ToInt64(xs, rs)
-}
-
-func uint8ToInt64Pure(xs []uint8, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToInt64Avx2(xs []uint8, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint8ToInt64Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToInt64Avx512(xs []uint8, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint8ToInt64Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToInt64(xs []uint16, rs []int64) ([]int64, error) {
-	return uint16ToInt64(xs, rs)
-}
-
-func uint16ToInt64Pure(xs []uint16, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToInt64Avx2(xs []uint16, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint16ToInt64Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToInt64Avx512(xs []uint16, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint16ToInt64Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToInt64(xs []uint32, rs []int64) ([]int64, error) {
-	return uint32ToInt64(xs, rs)
-}
-
-func uint32ToInt64Pure(xs []uint32, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToInt64Avx2(xs []uint32, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint32ToInt64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint32ToInt64Avx512(xs []uint32, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint32ToInt64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToInt64(xs []uint64, rs []int64) ([]int64, error) {
-	return uint64ToInt64(xs, rs)
-}
-
-func uint64ToInt64Pure(xs []uint64, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToInt64Avx2(xs []uint64, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint64ToInt64Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint64ToInt64Avx512(xs []uint64, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	uint64ToInt64Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToInt64(xs []float32, rs []int64) ([]int64, error) {
-	return float32ToInt64(xs, rs)
-}
-
-func float32ToInt64Pure(xs []float32, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToInt64Avx2(xs []float32, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	float32ToInt64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func float32ToInt64Avx512(xs []float32, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	float32ToInt64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToInt64(xs []float64, rs []int64) ([]int64, error) {
-	return float64ToInt64(xs, rs)
-}
-
-func float64ToInt64Pure(xs []float64, rs []int64) ([]int64, error) {
-	for i, x := range xs {
-		rs[i] = int64(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToInt64Avx2(xs []float64, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	float64ToInt64Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-
-func float64ToInt64Avx512(xs []float64, rs []int64) ([]int64, error) {
-	n := len(xs) / 2
-	float64ToInt64Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = int64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToUint8(xs []int8, rs []uint8) ([]uint8, error) {
-	return int8ToUint8(xs, rs)
-}
-
-func int8ToUint8Pure(xs []int8, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToUint8Avx2(xs []int8, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 16
-	int8ToUint8Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 16, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToUint8Avx512(xs []int8, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 16
-	int8ToUint8Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 16, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToUint8(xs []int16, rs []uint8) ([]uint8, error) {
-	return int16ToUint8(xs, rs)
-}
-
-func int16ToUint8Pure(xs []int16, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToUint8Avx512(xs []int16, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 8
-	int16ToUint8Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToUint8(xs []int32, rs []uint8) ([]uint8, error) {
-	return int32ToUint8(xs, rs)
-}
-
-func int32ToUint8Pure(xs []int32, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToUint8Avx512(xs []int32, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 4
-	int32ToUint8Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToUint8(xs []int64, rs []uint8) ([]uint8, error) {
-	return int64ToUint8(xs, rs)
-}
-
-func int64ToUint8Pure(xs []int64, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToUint8Avx512(xs []int64, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 2
-	int64ToUint8Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToUint8(xs []uint16, rs []uint8) ([]uint8, error) {
-	return uint16ToUint8(xs, rs)
-}
-
-func uint16ToUint8Pure(xs []uint16, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToUint8Avx512(xs []uint16, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 8
-	uint16ToUint8Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToUint8(xs []uint32, rs []uint8) ([]uint8, error) {
-	return uint32ToUint8(xs, rs)
-}
-
-func uint32ToUint8Pure(xs []uint32, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToUint8Avx512(xs []uint32, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 4
-	uint32ToUint8Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToUint8(xs []uint64, rs []uint8) ([]uint8, error) {
-	return uint64ToUint8(xs, rs)
-}
-
-func uint64ToUint8Pure(xs []uint64, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToUint8Avx512(xs []uint64, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 2
-	uint64ToUint8Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToUint8(xs []float32, rs []uint8) ([]uint8, error) {
-	return float32ToUint8(xs, rs)
-}
-
-func float32ToUint8Pure(xs []float32, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToUint8Avx512(xs []float32, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 4
-	float32ToUint8Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToUint8(xs []float64, rs []uint8) ([]uint8, error) {
-	return float64ToUint8(xs, rs)
-}
-
-func float64ToUint8Pure(xs []float64, rs []uint8) ([]uint8, error) {
-	for i, x := range xs {
-		rs[i] = uint8(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToUint8Avx512(xs []float64, rs []uint8) ([]uint8, error) {
-	n := len(xs) / 2
-	float64ToUint8Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint8(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToUint16(xs []int8, rs []uint16) ([]uint16, error) {
-	return int8ToUint16(xs, rs)
-}
-
-func int8ToUint16Pure(xs []int8, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToUint16Avx2(xs []int8, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 8
-	int8ToUint16Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToUint16Avx512(xs []int8, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 8
-	int8ToUint16Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToUint16(xs []int16, rs []uint16) ([]uint16, error) {
-	return int16ToUint16(xs, rs)
-}
-
-func int16ToUint16Pure(xs []int16, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToUint16Avx2(xs []int16, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 8
-	int16ToUint16Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToUint16Avx512(xs []int16, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 8
-	int16ToUint16Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToUint16(xs []int32, rs []uint16) ([]uint16, error) {
-	return int32ToUint16(xs, rs)
-}
-
-func int32ToUint16Pure(xs []int32, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToUint16Avx512(xs []int32, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 4
-	int32ToUint16Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToUint16(xs []int64, rs []uint16) ([]uint16, error) {
-	return int64ToUint16(xs, rs)
-}
-
-func int64ToUint16Pure(xs []int64, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToUint16Avx512(xs []int64, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 2
-	int64ToUint16Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToUint16(xs []uint8, rs []uint16) ([]uint16, error) {
-	return uint8ToUint16(xs, rs)
-}
-
-func uint8ToUint16Pure(xs []uint8, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToUint16Avx2(xs []uint8, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 8
-	uint8ToUint16Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToUint16Avx512(xs []uint8, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 8
-	uint8ToUint16Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 8, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToUint16(xs []uint32, rs []uint16) ([]uint16, error) {
-	return uint32ToUint16(xs, rs)
-}
-
-func uint32ToUint16Pure(xs []uint32, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToUint16Avx512(xs []uint32, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 4
-	uint32ToUint16Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToUint16(xs []uint64, rs []uint16) ([]uint16, error) {
-	return uint64ToUint16(xs, rs)
-}
-
-func uint64ToUint16Pure(xs []uint64, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToUint16Avx512(xs []uint64, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 2
-	uint64ToUint16Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToUint16(xs []float32, rs []uint16) ([]uint16, error) {
-	return float32ToUint16(xs, rs)
-}
-
-func float32ToUint16Pure(xs []float32, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToUint16Avx512(xs []float32, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 4
-	float32ToUint16Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToUint16(xs []float64, rs []uint16) ([]uint16, error) {
-	return float64ToUint16(xs, rs)
-}
-
-func float64ToUint16Pure(xs []float64, rs []uint16) ([]uint16, error) {
-	for i, x := range xs {
-		rs[i] = uint16(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToUint16Avx512(xs []float64, rs []uint16) ([]uint16, error) {
-	n := len(xs) / 2
-	float64ToUint16Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint16(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToUint32(xs []int8, rs []uint32) ([]uint32, error) {
-	return int8ToUint32(xs, rs)
-}
-
-func int8ToUint32Pure(xs []int8, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToUint32Avx2(xs []int8, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	int8ToUint32Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToUint32Avx512(xs []int8, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	int8ToUint32Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToUint32(xs []int16, rs []uint32) ([]uint32, error) {
-	return int16ToUint32(xs, rs)
-}
-
-func int16ToUint32Pure(xs []int16, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToUint32Avx2(xs []int16, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	int16ToUint32Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToUint32Avx512(xs []int16, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	int16ToUint32Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToUint32(xs []int32, rs []uint32) ([]uint32, error) {
-	return int32ToUint32(xs, rs)
-}
-
-func int32ToUint32Pure(xs []int32, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToUint32Avx2(xs []int32, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	int32ToUint32Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func int32ToUint32Avx512(xs []int32, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	int32ToUint32Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToUint32(xs []int64, rs []uint32) ([]uint32, error) {
-	return int64ToUint32(xs, rs)
-}
-
-func int64ToUint32Pure(xs []int64, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToUint32Avx512(xs []int64, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 2
-	int64ToUint32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToUint32(xs []uint8, rs []uint32) ([]uint32, error) {
-	return uint8ToUint32(xs, rs)
-}
-
-func uint8ToUint32Pure(xs []uint8, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToUint32Avx2(xs []uint8, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	uint8ToUint32Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToUint32Avx512(xs []uint8, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	uint8ToUint32Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToUint32(xs []uint16, rs []uint32) ([]uint32, error) {
-	return uint16ToUint32(xs, rs)
-}
-
-func uint16ToUint32Pure(xs []uint16, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToUint32Avx2(xs []uint16, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	uint16ToUint32Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToUint32Avx512(xs []uint16, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	uint16ToUint32Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToUint32(xs []uint64, rs []uint32) ([]uint32, error) {
-	return uint64ToUint32(xs, rs)
-}
-
-func uint64ToUint32Pure(xs []uint64, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToUint32Avx512(xs []uint64, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 2
-	uint64ToUint32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToUint32(xs []float32, rs []uint32) ([]uint32, error) {
-	return float32ToUint32(xs, rs)
-}
-
-func float32ToUint32Pure(xs []float32, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToUint32Avx2(xs []float32, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	float32ToUint32Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func float32ToUint32Avx512(xs []float32, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	float32ToUint32Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToUint32(xs []float64, rs []uint32) ([]uint32, error) {
-	return float64ToUint32(xs, rs)
-}
-
-func float64ToUint32Pure(xs []float64, rs []uint32) ([]uint32, error) {
-	for i, x := range xs {
-		rs[i] = uint32(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToUint32Avx2(xs []float64, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	float64ToUint32Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-
-func float64ToUint32Avx512(xs []float64, rs []uint32) ([]uint32, error) {
-	n := len(xs) / 4
-	float64ToUint32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = uint32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToUint64(xs []int8, rs []uint64) ([]uint64, error) {
-	return int8ToUint64(xs, rs)
-}
-
-func int8ToUint64Pure(xs []int8, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToUint64Avx2(xs []int8, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int8ToUint64Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToUint64Avx512(xs []int8, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int8ToUint64Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToUint64(xs []int16, rs []uint64) ([]uint64, error) {
-	return int16ToUint64(xs, rs)
-}
-
-func int16ToUint64Pure(xs []int16, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToUint64Avx2(xs []int16, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int16ToUint64Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToUint64Avx512(xs []int16, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int16ToUint64Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToUint64(xs []int32, rs []uint64) ([]uint64, error) {
-	return int32ToUint64(xs, rs)
-}
-
-func int32ToUint64Pure(xs []int32, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToUint64Avx2(xs []int32, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int32ToUint64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func int32ToUint64Avx512(xs []int32, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int32ToUint64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToUint64(xs []int64, rs []uint64) ([]uint64, error) {
-	return int64ToUint64(xs, rs)
-}
-
-func int64ToUint64Pure(xs []int64, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToUint64Avx2(xs []int64, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int64ToUint64Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func int64ToUint64Avx512(xs []int64, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	int64ToUint64Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToUint64(xs []uint8, rs []uint64) ([]uint64, error) {
-	return uint8ToUint64(xs, rs)
-}
-
-func uint8ToUint64Pure(xs []uint8, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToUint64Avx2(xs []uint8, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	uint8ToUint64Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToUint64Avx512(xs []uint8, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	uint8ToUint64Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToUint64(xs []uint16, rs []uint64) ([]uint64, error) {
-	return uint16ToUint64(xs, rs)
-}
-
-func uint16ToUint64Pure(xs []uint16, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToUint64Avx2(xs []uint16, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	uint16ToUint64Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToUint64Avx512(xs []uint16, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	uint16ToUint64Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToUint64(xs []uint32, rs []uint64) ([]uint64, error) {
-	return uint32ToUint64(xs, rs)
-}
-
-func uint32ToUint64Pure(xs []uint32, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToUint64Avx2(xs []uint32, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	uint32ToUint64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint32ToUint64Avx512(xs []uint32, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	uint32ToUint64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToUint64(xs []float32, rs []uint64) ([]uint64, error) {
-	return float32ToUint64(xs, rs)
-}
-
-func float32ToUint64Pure(xs []float32, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToUint64Avx2(xs []float32, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	float32ToUint64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func float32ToUint64Avx512(xs []float32, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	float32ToUint64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToUint64(xs []float64, rs []uint64) ([]uint64, error) {
-	return float64ToUint64(xs, rs)
-}
-
-func float64ToUint64Pure(xs []float64, rs []uint64) ([]uint64, error) {
-	for i, x := range xs {
-		rs[i] = uint64(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToUint64Avx2(xs []float64, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	float64ToUint64Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-
-func float64ToUint64Avx512(xs []float64, rs []uint64) ([]uint64, error) {
-	n := len(xs) / 2
-	float64ToUint64Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = uint64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToFloat32(xs []int8, rs []float32) ([]float32, error) {
-	return int8ToFloat32(xs, rs)
-}
-
-func int8ToFloat32Pure(xs []int8, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToFloat32Avx2(xs []int8, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	int8ToFloat32Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToFloat32Avx512(xs []int8, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	int8ToFloat32Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToFloat32(xs []int16, rs []float32) ([]float32, error) {
-	return int16ToFloat32(xs, rs)
-}
-
-func int16ToFloat32Pure(xs []int16, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToFloat32Avx2(xs []int16, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	int16ToFloat32Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToFloat32Avx512(xs []int16, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	int16ToFloat32Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToFloat32(xs []int32, rs []float32) ([]float32, error) {
-	return int32ToFloat32(xs, rs)
-}
-
-func int32ToFloat32Pure(xs []int32, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToFloat32Avx2(xs []int32, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	int32ToFloat32Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func int32ToFloat32Avx512(xs []int32, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	int32ToFloat32Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToFloat32(xs []int64, rs []float32) ([]float32, error) {
-	return int64ToFloat32(xs, rs)
-}
-
-func int64ToFloat32Pure(xs []int64, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToFloat32Avx512(xs []int64, rs []float32) ([]float32, error) {
-	n := len(xs) / 2
-	int64ToFloat32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToFloat32(xs []uint8, rs []float32) ([]float32, error) {
-	return uint8ToFloat32(xs, rs)
-}
-
-func uint8ToFloat32Pure(xs []uint8, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToFloat32Avx2(xs []uint8, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	uint8ToFloat32Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToFloat32Avx512(xs []uint8, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	uint8ToFloat32Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToFloat32(xs []uint16, rs []float32) ([]float32, error) {
-	return uint16ToFloat32(xs, rs)
-}
-
-func uint16ToFloat32Pure(xs []uint16, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToFloat32Avx2(xs []uint16, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	uint16ToFloat32Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToFloat32Avx512(xs []uint16, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	uint16ToFloat32Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToFloat32(xs []uint32, rs []float32) ([]float32, error) {
-	return uint32ToFloat32(xs, rs)
-}
-
-func uint32ToFloat32Pure(xs []uint32, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToFloat32Avx2(xs []uint32, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	uint32ToFloat32Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func uint32ToFloat32Avx512(xs []uint32, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	uint32ToFloat32Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToFloat32(xs []uint64, rs []float32) ([]float32, error) {
-	return uint64ToFloat32(xs, rs)
-}
-
-func uint64ToFloat32Pure(xs []uint64, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToFloat32Avx512(xs []uint64, rs []float32) ([]float32, error) {
-	n := len(xs) / 2
-	uint64ToFloat32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float64ToFloat32(xs []float64, rs []float32) ([]float32, error) {
-	return float64ToFloat32(xs, rs)
-}
-
-func float64ToFloat32Pure(xs []float64, rs []float32) ([]float32, error) {
-	for i, x := range xs {
-		rs[i] = float32(x)
-	}
-	return rs, nil
-}
-
-/*
-func float64ToFloat32Avx2(xs []float64, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	float64ToFloat32Avx2Asm(xs[:n*2], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-
-func float64ToFloat32Avx512(xs []float64, rs []float32) ([]float32, error) {
-	n := len(xs) / 4
-	float64ToFloat32Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 4, len(xs); i < j; i++ {
-		rs[i] = float32(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int8ToFloat64(xs []int8, rs []float64) ([]float64, error) {
-	return int8ToFloat64(xs, rs)
-}
-
-func int8ToFloat64Pure(xs []int8, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int8ToFloat64Avx2(xs []int8, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int8ToFloat64Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func int8ToFloat64Avx512(xs []int8, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int8ToFloat64Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int16ToFloat64(xs []int16, rs []float64) ([]float64, error) {
-	return int16ToFloat64(xs, rs)
-}
-
-func int16ToFloat64Pure(xs []int16, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int16ToFloat64Avx2(xs []int16, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int16ToFloat64Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func int16ToFloat64Avx512(xs []int16, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int16ToFloat64Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int32ToFloat64(xs []int32, rs []float64) ([]float64, error) {
-	return int32ToFloat64(xs, rs)
-}
-
-func int32ToFloat64Pure(xs []int32, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int32ToFloat64Avx2(xs []int32, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int32ToFloat64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func int32ToFloat64Avx512(xs []int32, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int32ToFloat64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Int64ToFloat64(xs []int64, rs []float64) ([]float64, error) {
-	return int64ToFloat64(xs, rs)
-}
-
-func int64ToFloat64Pure(xs []int64, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func int64ToFloat64Avx512(xs []int64, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	int64ToFloat64Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint8ToFloat64(xs []uint8, rs []float64) ([]float64, error) {
-	return uint8ToFloat64(xs, rs)
-}
-
-func uint8ToFloat64Pure(xs []uint8, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint8ToFloat64Avx2(xs []uint8, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint8ToFloat64Avx2Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint8ToFloat64Avx512(xs []uint8, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint8ToFloat64Avx512Asm(xs[:n*16], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint16ToFloat64(xs []uint16, rs []float64) ([]float64, error) {
-	return uint16ToFloat64(xs, rs)
-}
-
-func uint16ToFloat64Pure(xs []uint16, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint16ToFloat64Avx2(xs []uint16, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint16ToFloat64Avx2Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint16ToFloat64Avx512(xs []uint16, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint16ToFloat64Avx512Asm(xs[:n*8], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint32ToFloat64(xs []uint32, rs []float64) ([]float64, error) {
-	return uint32ToFloat64(xs, rs)
-}
-
-func uint32ToFloat64Pure(xs []uint32, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint32ToFloat64Avx2(xs []uint32, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint32ToFloat64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func uint32ToFloat64Avx512(xs []uint32, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint32ToFloat64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Uint64ToFloat64(xs []uint64, rs []float64) ([]float64, error) {
-	return uint64ToFloat64(xs, rs)
-}
-
-func uint64ToFloat64Pure(xs []uint64, rs []float64) ([]float64, error) {
-	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func uint64ToFloat64Avx512(xs []uint64, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	uint64ToFloat64Avx512Asm(xs[:n*2], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func Float32ToFloat64(xs []float32, rs []float64) ([]float64, error) {
-	return float32ToFloat64(xs, rs)
-}
+var (
+	Int16ToInt8   = numericToNumeric[int16, int8]
+	Int32ToInt8   = numericToNumeric[int32, int8]
+	Int64ToInt8   = numericToNumeric[int64, int8]
+	Uint8ToInt8   = numericToNumeric[uint8, int8]
+	Uint16ToInt8  = numericToNumeric[uint16, int8]
+	Uint32ToInt8  = numericToNumeric[uint32, int8]
+	Uint64ToInt8  = numericToNumeric[uint64, int8]
+	Float32ToInt8 = numericToNumeric[float32, int8]
+	Float64ToInt8 = numericToNumeric[float64, int8]
+
+	Int8ToInt16    = numericToNumeric[int8, int16]
+	Int32ToInt16   = numericToNumeric[int32, int16]
+	Int64ToInt16   = numericToNumeric[int64, int16]
+	Uint8ToInt16   = numericToNumeric[uint8, int16]
+	Uint16ToInt16  = numericToNumeric[uint16, int16]
+	Uint32ToInt16  = numericToNumeric[uint32, int16]
+	Uint64ToInt16  = numericToNumeric[uint64, int16]
+	Float32ToInt16 = numericToNumeric[float32, int16]
+	Float64ToInt16 = numericToNumeric[float64, int16]
+
+	Int8ToInt32    = numericToNumeric[int8, int32]
+	Int16ToInt32   = numericToNumeric[int16, int32]
+	Int64ToInt32   = numericToNumeric[int64, int32]
+	Uint8ToInt32   = numericToNumeric[uint8, int32]
+	Uint16ToInt32  = numericToNumeric[uint16, int32]
+	Uint32ToInt32  = numericToNumeric[uint32, int32]
+	Uint64ToInt32  = numericToNumeric[uint64, int32]
+	Float32ToInt32 = numericToNumeric[float32, int32]
+	Float64ToInt32 = numericToNumeric[float64, int32]
+
+	Int8ToInt64    = numericToNumeric[int8, int64]
+	Int16ToInt64   = numericToNumeric[int16, int64]
+	Int32ToInt64   = numericToNumeric[int32, int64]
+	Uint8ToInt64   = numericToNumeric[uint8, int64]
+	Uint16ToInt64  = numericToNumeric[uint16, int64]
+	Uint32ToInt64  = numericToNumeric[uint32, int64]
+	Uint64ToInt64  = numericToNumeric[uint64, int64]
+	Float32ToInt64 = numericToNumeric[float32, int64]
+	Float64ToInt64 = numericToNumeric[float64, int64]
+
+	Int8ToUint8    = numericToNumeric[int8, uint8]
+	Int16ToUint8   = numericToNumeric[int16, uint8]
+	Int32ToUint8   = numericToNumeric[int32, uint8]
+	Int64ToUint8   = numericToNumeric[int64, uint8]
+	Uint16ToUint8  = numericToNumeric[uint16, uint8]
+	Uint32ToUint8  = numericToNumeric[uint32, uint8]
+	Uint64ToUint8  = numericToNumeric[uint64, uint8]
+	Float32ToUint8 = numericToNumeric[float32, uint8]
+	Float64ToUint8 = numericToNumeric[float64, uint8]
+
+	Int8ToUint16    = numericToNumeric[int8, uint16]
+	Int16ToUint16   = numericToNumeric[int16, uint16]
+	Int32ToUint16   = numericToNumeric[int32, uint16]
+	Int64ToUint16   = numericToNumeric[int64, uint16]
+	Uint8ToUint16   = numericToNumeric[uint8, uint16]
+	Uint32ToUint16  = numericToNumeric[uint32, uint16]
+	Uint64ToUint16  = numericToNumeric[uint64, uint16]
+	Float32ToUint16 = numericToNumeric[float32, uint16]
+	Float64ToUint16 = numericToNumeric[float64, uint16]
+
+	Int8ToUint32    = numericToNumeric[int8, uint32]
+	Int16ToUint32   = numericToNumeric[int16, uint32]
+	Int32ToUint32   = numericToNumeric[int32, uint32]
+	Int64ToUint32   = numericToNumeric[int64, uint32]
+	Uint8ToUint32   = numericToNumeric[uint8, uint32]
+	Uint16ToUint32  = numericToNumeric[uint16, uint32]
+	Uint64ToUint32  = numericToNumeric[uint64, uint32]
+	Float32ToUint32 = numericToNumeric[float32, uint32]
+	Float64ToUint32 = numericToNumeric[float64, uint32]
+
+	Int8ToUint64    = numericToNumeric[int8, uint64]
+	Int16ToUint64   = numericToNumeric[int16, uint64]
+	Int32ToUint64   = numericToNumeric[int32, uint64]
+	Int64ToUint64   = numericToNumeric[int64, uint64]
+	Uint8ToUint64   = numericToNumeric[uint8, uint64]
+	Uint16ToUint64  = numericToNumeric[uint16, uint64]
+	Uint32ToUint64  = numericToNumeric[uint32, uint64]
+	Float32ToUint64 = numericToNumeric[float32, uint64]
+	Float64ToUint64 = numericToNumeric[float64, uint64]
+
+	Int8ToFloat32    = numericToNumeric[int8, float32]
+	Int16ToFloat32   = numericToNumeric[int16, float32]
+	Int32ToFloat32   = numericToNumeric[int32, float32]
+	Int64ToFloat32   = numericToNumeric[int64, float32]
+	Uint8ToFloat32   = numericToNumeric[uint8, float32]
+	Uint16ToFloat32  = numericToNumeric[uint16, float32]
+	Uint32ToFloat32  = numericToNumeric[uint32, float32]
+	Uint64ToFloat32  = numericToNumeric[uint64, float32]
+	Float64ToFloat32 = numericToNumeric[float64, float32]
+
+	Int8ToFloat64    = numericToNumeric[int8, float64]
+	Int16ToFloat64   = numericToNumeric[int16, float64]
+	Int32ToFloat64   = numericToNumeric[int32, float64]
+	Int64ToFloat64   = numericToNumeric[int64, float64]
+	Uint8ToFloat64   = numericToNumeric[uint8, float64]
+	Uint16ToFloat64  = numericToNumeric[uint16, float64]
+	Uint32ToFloat64  = numericToNumeric[uint32, float64]
+	Uint64ToFloat64  = numericToNumeric[uint64, float64]
+	Float32ToFloat64 = numericToNumeric[float32, float64]
+
+	BytesToInt8    = bytesToInt[int8]
+	Int8ToBytes    = intToBytes[int8]
+	BytesToInt16   = bytesToInt[int16]
+	Int16ToBytes   = intToBytes[int16]
+	BytesToInt32   = bytesToInt[int32]
+	Int32ToBytes   = intToBytes[int32]
+	BytesToInt64   = bytesToInt[int64]
+	Int64ToBytes   = intToBytes[int64]
+	BytesToUint8   = bytesToInt[uint8]
+	Uint8ToBytes   = intToBytes[uint8]
+	BytesToUint16  = bytesToInt[uint16]
+	Uint16ToBytes  = intToBytes[uint16]
+	BytesToUint32  = bytesToInt[uint32]
+	Uint32ToBytes  = intToBytes[uint32]
+	BytesToUint64  = bytesToInt[uint64]
+	Uint64ToBytes  = intToBytes[uint64]
+	BytesToFloat32 = bytesToFloat[float32]
+	Float32ToBytes = floatToBytes[float32]
+	BytesToFloat64 = bytesToFloat[float64]
+	Float64ToBytes = floatToBytes[float64]
+
+	Decimal64ToDecimal128 = decimal64ToDecimal128Pure
+
+	Int8ToDecimal128  = intToDecimal128[int8]
+	Int16ToDecimal128 = intToDecimal128[int16]
+	Int32ToDecimal128 = intToDecimal128[int32]
+	Int64ToDecimal128 = intToDecimal128[int64]
+)
 
-func float32ToFloat64Pure(xs []float32, rs []float64) ([]float64, error) {
+func numericToNumeric[T1, T2 constraints.Integer | constraints.Float](xs []T1, rs []T2) ([]T2, error) {
 	for i, x := range xs {
-		rs[i] = float64(x)
-	}
-	return rs, nil
-}
-
-/*
-func float32ToFloat64Avx2(xs []float32, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	float32ToFloat64Avx2Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-
-func float32ToFloat64Avx512(xs []float32, rs []float64) ([]float64, error) {
-	n := len(xs) / 2
-	float32ToFloat64Avx512Asm(xs[:n*4], rs)
-	for i, j := n * 2, len(xs); i < j; i++ {
-		rs[i] = float64(xs[i])
-	}
-	return rs, nil
-}
-*/
-
-func BytesToInt8(xs *types.Bytes, rs []int8) ([]int8, error) {
-	return bytesToInt8(xs, rs)
-}
-
-func bytesToInt8Pure(xs *types.Bytes, rs []int8) ([]int8, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 8)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = int8(val)
-	}
-	return rs, nil
-}
-
-func Int8ToBytes(xs []int8, rs *types.Bytes) (*types.Bytes, error) {
-	return int8ToBytes(xs, rs)
-}
-
-func int8ToBytesPure(xs []int8, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToInt16(xs *types.Bytes, rs []int16) ([]int16, error) {
-	return bytesToInt16(xs, rs)
-}
-
-func bytesToInt16Pure(xs *types.Bytes, rs []int16) ([]int16, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 16)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = int16(val)
-	}
-	return rs, nil
-}
-
-func Int16ToBytes(xs []int16, rs *types.Bytes) (*types.Bytes, error) {
-	return int16ToBytes(xs, rs)
-}
-
-func int16ToBytesPure(xs []int16, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToInt32(xs *types.Bytes, rs []int32) ([]int32, error) {
-	return bytesToInt32(xs, rs)
-}
-
-func bytesToInt32Pure(xs *types.Bytes, rs []int32) ([]int32, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 32)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = int32(val)
-	}
-	return rs, nil
-}
-
-func Int32ToBytes(xs []int32, rs *types.Bytes) (*types.Bytes, error) {
-	return int32ToBytes(xs, rs)
-}
-
-func int32ToBytesPure(xs []int32, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToInt64(xs *types.Bytes, rs []int64) ([]int64, error) {
-	return bytesToInt64(xs, rs)
-}
-
-func bytesToInt64Pure(xs *types.Bytes, rs []int64) ([]int64, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 64)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = val
-	}
-	return rs, nil
-}
-
-func Int64ToBytes(xs []int64, rs *types.Bytes) (*types.Bytes, error) {
-	return int64ToBytes(xs, rs)
-}
-
-func int64ToBytesPure(xs []int64, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, x, 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToUint8(xs *types.Bytes, rs []uint8) ([]uint8, error) {
-	return bytesToUint8(xs, rs)
-}
-
-func bytesToUint8Pure(xs *types.Bytes, rs []uint8) ([]uint8, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 8)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = uint8(val)
-	}
-	return rs, nil
-}
-
-func Uint8ToBytes(xs []uint8, rs *types.Bytes) (*types.Bytes, error) {
-	return uint8ToBytes(xs, rs)
-}
-
-func uint8ToBytesPure(xs []uint8, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToUint16(xs *types.Bytes, rs []uint16) ([]uint16, error) {
-	return bytesToUint16(xs, rs)
-}
-
-func bytesToUint16Pure(xs *types.Bytes, rs []uint16) ([]uint16, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 16)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = uint16(val)
-	}
-	return rs, nil
-}
-
-func Uint16ToBytes(xs []uint16, rs *types.Bytes) (*types.Bytes, error) {
-	return uint16ToBytes(xs, rs)
-}
-
-func uint16ToBytesPure(xs []uint16, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToUint32(xs *types.Bytes, rs []uint32) ([]uint32, error) {
-	return bytesToUint32(xs, rs)
-}
-
-func bytesToUint32Pure(xs *types.Bytes, rs []uint32) ([]uint32, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 32)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = uint32(val)
-	}
-	return rs, nil
-}
-
-func Uint32ToBytes(xs []uint32, rs *types.Bytes) (*types.Bytes, error) {
-	return uint32ToBytes(xs, rs)
-}
-
-func uint32ToBytesPure(xs []uint32, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
+		rs[i] = T2(x)
 	}
 	return rs, nil
 }
 
-func BytesToUint64(xs *types.Bytes, rs []uint64) ([]uint64, error) {
-	return bytesToUint64(xs, rs)
-}
+func bytesToInt[T constraints.Integer](xs *types.Bytes, rs []T) ([]T, error) {
+	var bitSize = int(unsafe.Sizeof(T(0))) * 8
 
-func bytesToUint64Pure(xs *types.Bytes, rs []uint64) ([]uint64, error) {
 	for i, o := range xs.Offsets {
 		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseInt(s, 10, 64)
+		val, err := strconv.ParseInt(s, 10, bitSize)
 		if err != nil {
 			return nil, err
 		}
-		rs[i] = uint64(val)
+		rs[i] = T(val)
 	}
 	return rs, nil
 }
 
-func Uint64ToBytes(xs []uint64, rs *types.Bytes) (*types.Bytes, error) {
-	return uint64ToBytes(xs, rs)
-}
-
-func uint64ToBytesPure(xs []uint64, rs *types.Bytes) (*types.Bytes, error) {
+func intToBytes[T constraints.Integer](xs []T, rs *types.Bytes) (*types.Bytes, error) {
 	oldLen := uint32(0)
 	for _, x := range xs {
 		rs.Data = strconv.AppendInt(rs.Data, int64(x), 10)
@@ -3224,62 +185,26 @@ func uint64ToBytesPure(xs []uint64, rs *types.Bytes) (*types.Bytes, error) {
 	return rs, nil
 }
 
-func BytesToFloat32(xs *types.Bytes, rs []float32) ([]float32, error) {
-	return bytesToFloat32(xs, rs)
-}
-
-func bytesToFloat32Pure(xs *types.Bytes, rs []float32) ([]float32, error) {
-	for i, o := range xs.Offsets {
-		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseFloat(s, 32)
-		if err != nil {
-			return nil, err
-		}
-		rs[i] = float32(val)
-	}
-	return rs, nil
-}
-
-func Float32ToBytes(xs []float32, rs *types.Bytes) (*types.Bytes, error) {
-	return float32ToBytes(xs, rs)
-}
-
-func float32ToBytesPure(xs []float32, rs *types.Bytes) (*types.Bytes, error) {
-	oldLen := uint32(0)
-	for _, x := range xs {
-		rs.Data = strconv.AppendFloat(rs.Data, float64(x), 'G', -1, 32)
-		newLen := uint32(len(rs.Data))
-		rs.Offsets = append(rs.Offsets, oldLen)
-		rs.Lengths = append(rs.Lengths, newLen-oldLen)
-		oldLen = newLen
-	}
-	return rs, nil
-}
-
-func BytesToFloat64(xs *types.Bytes, rs []float64) ([]float64, error) {
-	return bytesToFloat64(xs, rs)
-}
+func bytesToFloat[T constraints.Float](xs *types.Bytes, rs []T) ([]T, error) {
+	var bitSize = int(unsafe.Sizeof(T(0))) * 8
 
-func bytesToFloat64Pure(xs *types.Bytes, rs []float64) ([]float64, error) {
 	for i, o := range xs.Offsets {
 		s := string(xs.Data[o : o+xs.Lengths[i]])
-		val, err := strconv.ParseFloat(s, 64)
+		val, err := strconv.ParseFloat(s, bitSize)
 		if err != nil {
 			return nil, err
 		}
-		rs[i] = val
+		rs[i] = T(val)
 	}
 	return rs, nil
 }
 
-func Float64ToBytes(xs []float64, rs *types.Bytes) (*types.Bytes, error) {
-	return float64ToBytes(xs, rs)
-}
+func floatToBytes[T constraints.Float](xs []T, rs *types.Bytes) (*types.Bytes, error) {
+	var bitSize = int(unsafe.Sizeof(T(0))) * 8
 
-func float64ToBytesPure(xs []float64, rs *types.Bytes) (*types.Bytes, error) {
 	oldLen := uint32(0)
 	for _, x := range xs {
-		rs.Data = strconv.AppendFloat(rs.Data, x, 'G', -1, 64)
+		rs.Data = strconv.AppendFloat(rs.Data, float64(x), 'G', -1, bitSize)
 		newLen := uint32(len(rs.Data))
 		rs.Offsets = append(rs.Offsets, oldLen)
 		rs.Lengths = append(rs.Lengths, newLen-oldLen)
@@ -3288,10 +213,6 @@ func float64ToBytesPure(xs []float64, rs *types.Bytes) (*types.Bytes, error) {
 	return rs, nil
 }
 
-func Decimal64ToDecimal128(xs []types.Decimal64, rs []types.Decimal128) ([]types.Decimal128, error) {
-	return decimal64ToDecimal128(xs, rs)
-}
-
 func decimal64ToDecimal128Pure(xs []types.Decimal64, rs []types.Decimal128) ([]types.Decimal128, error) {
 	for i, x := range xs {
 		rs[i] = types.Decimal64ToDecimal128(x)
@@ -3299,46 +220,9 @@ func decimal64ToDecimal128Pure(xs []types.Decimal64, rs []types.Decimal128) ([]t
 	return rs, nil
 }
 
-func Int8ToDecimal128(xs []int8, rs []types.Decimal128) ([]types.Decimal128, error) {
-	return int8ToDecimal128(xs, rs)
-}
-
-func int8ToDecimal128Pure(xs []int8, rs []types.Decimal128) ([]types.Decimal128, error) {
-	for i, x := range xs {
-		rs[i] = types.InitDecimal128(int64(x))
-	}
-	return rs, nil
-}
-
-func Int16ToDecimal128(xs []int16, rs []types.Decimal128) ([]types.Decimal128, error) {
-	return int16ToDecimal128(xs, rs)
-}
-
-func int16ToDecimal128Pure(xs []int16, rs []types.Decimal128) ([]types.Decimal128, error) {
-	for i, x := range xs {
-		rs[i] = types.InitDecimal128(int64(x))
-	}
-	return rs, nil
-}
-
-func Int32ToDecimal128(xs []int32, rs []types.Decimal128) ([]types.Decimal128, error) {
-	return int32ToDecimal128(xs, rs)
-}
-
-func int32ToDecimal128Pure(xs []int32, rs []types.Decimal128) ([]types.Decimal128, error) {
+func intToDecimal128[T constraints.Integer](xs []T, rs []types.Decimal128) ([]types.Decimal128, error) {
 	for i, x := range xs {
 		rs[i] = types.InitDecimal128(int64(x))
 	}
 	return rs, nil
 }
-
-func Int64ToDecimal128(xs []int64, rs []types.Decimal128) ([]types.Decimal128, error) {
-	return int64ToDecimal128(xs, rs)
-}
-
-func int64ToDecimal128Pure(xs []int64, rs []types.Decimal128) ([]types.Decimal128, error) {
-	for i, x := range xs {
-		rs[i] = types.InitDecimal128(x)
-	}
-	return rs, nil
-}