diff --git a/pkg/sql/colexec/external/external.go b/pkg/sql/colexec/external/external.go index 13a992a5c5ce96a950210e689bb51bf51ec0e68e..13fca549e269196f2a49ac6e13c450f8e5ebd1ef 100644 --- a/pkg/sql/colexec/external/external.go +++ b/pkg/sql/colexec/external/external.go @@ -452,7 +452,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro for rowIdx := 0; rowIdx < plh.batchSize; rowIdx++ { Line = plh.simdCsvLineArray[rowIdx] if len(Line) < len(param.Attrs) { - return nil, errors.New("the table colnum is larger than input data colnum") + return nil, errors.New("the table column is larger than input data column") } for colIdx := range param.Attrs { field := Line[param.Name2ColIndex[param.Attrs[colIdx]]] @@ -475,7 +475,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro } else if field == "false" || field == "0" { cols[rowIdx] = false } else { - return nil, fmt.Errorf("the input value '%s' is not bool type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%s' is not bool type for column %d", field, colIdx) } } case types.T_int8: @@ -487,14 +487,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseInt(field, 10, 8) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int8 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int8 type for column %d", field, colIdx) } cols[rowIdx] = int8(d) } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < math.MinInt8 || d > math.MaxInt8 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int8 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int8 type for column %d", field, colIdx) } cols[rowIdx] = int8(d) } @@ -508,14 +508,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseInt(field, 10, 16) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int16 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int16 type for column %d", field, colIdx) } cols[rowIdx] = int16(d) } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < math.MinInt16 || d > math.MaxInt16 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int16 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int16 type for column %d", field, colIdx) } cols[rowIdx] = int16(d) } @@ -529,14 +529,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseInt(field, 10, 32) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int32 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int32 type for column %d", field, colIdx) } cols[rowIdx] = int32(d) } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < math.MinInt32 || d > math.MaxInt32 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int32 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int32 type for column %d", field, colIdx) } cols[rowIdx] = int32(d) } @@ -550,14 +550,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseInt(field, 10, 64) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int64 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int64 type for column %d", field, colIdx) } cols[rowIdx] = d } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < math.MinInt64 || d > math.MaxInt64 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not int64 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not int64 type for column %d", field, colIdx) } cols[rowIdx] = int64(d) } @@ -571,14 +571,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseUint(field, 10, 8) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint8 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint8 type for column %d", field, colIdx) } cols[rowIdx] = uint8(d) } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < 0 || d > math.MaxUint8 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint8 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint8 type for column %d", field, colIdx) } cols[rowIdx] = uint8(d) } @@ -592,14 +592,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseUint(field, 10, 16) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint16 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint16 type for column %d", field, colIdx) } cols[rowIdx] = uint16(d) } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < 0 || d > math.MaxUint16 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint16 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint16 type for column %d", field, colIdx) } cols[rowIdx] = uint16(d) } @@ -613,14 +613,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseUint(field, 10, 32) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint32 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint32 type for column %d", field, colIdx) } cols[rowIdx] = uint32(d) } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < 0 || d > math.MaxUint32 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint32 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint32 type for column %d", field, colIdx) } cols[rowIdx] = uint32(d) } @@ -634,14 +634,14 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseUint(field, 10, 64) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint64 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint64 type for column %d", field, colIdx) } cols[rowIdx] = d } else { d, err := strconv.ParseFloat(field, 64) if err != nil || d < 0 || d > math.MaxUint64 { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not uint64 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not uint64 type for column %d", field, colIdx) } cols[rowIdx] = uint64(d) } @@ -654,7 +654,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseFloat(field, 32) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not float32 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not float32 type for column %d", field, colIdx) } cols[rowIdx] = float32(d) } @@ -666,11 +666,11 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := strconv.ParseFloat(field, 32) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not float64 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not float64 type for column %d", field, colIdx) } cols[rowIdx] = d } - case types.T_char, types.T_varchar, types.T_json: + case types.T_char, types.T_varchar: vBytes := vec.Col.(*types.Bytes) if isNullOrEmpty { nulls.Add(vec.Nsp, uint64(rowIdx)) @@ -679,6 +679,27 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro vBytes.Data = append(vBytes.Data, field...) vBytes.Lengths[rowIdx] = uint32(len(field)) } + case types.T_json: + vBytes := vec.Col.(*types.Bytes) + if isNullOrEmpty { + nulls.Add(vec.Nsp, uint64(rowIdx)) + vBytes.Offsets[rowIdx] = uint32(len(vBytes.Data)) + vBytes.Lengths[rowIdx] = uint32(len(field)) + } else { + vBytes.Offsets[rowIdx] = uint32(len(vBytes.Data)) + byteJson, err := types.ParseStringToByteJson(field) + if err != nil { + logutil.Errorf("parse field[%v] err:%v", field, err) + return nil, fmt.Errorf("the input value '%v' is not json type for column %d", field, colIdx) + } + jsonBytes, err := types.EncodeJson(byteJson) + if err != nil { + logutil.Errorf("encode json[%v] err:%v", field, err) + return nil, fmt.Errorf("the input value '%v' is not json type for column %d", field, colIdx) + } + vBytes.Data = append(vBytes.Data, jsonBytes...) + vBytes.Lengths[rowIdx] = uint32(len(jsonBytes)) + } case types.T_date: cols := vec.Col.([]types.Date) if isNullOrEmpty { @@ -687,7 +708,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := types.ParseDate(field) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not Date type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not Date type for column %d", field, colIdx) } cols[rowIdx] = d } @@ -699,7 +720,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := types.ParseDatetime(field, vec.Typ.Precision) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not Datetime type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not Datetime type for column %d", field, colIdx) } cols[rowIdx] = d } @@ -713,7 +734,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro // we tolerate loss of digits. if !moerr.IsMoErrCode(err, moerr.DATA_TRUNCATED) { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not Decimal64 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not Decimal64 type for column %d", field, colIdx) } } cols[rowIdx] = d @@ -728,7 +749,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro // we tolerate loss of digits. if !moerr.IsMoErrCode(err, moerr.DATA_TRUNCATED) { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not Decimal128 type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not Decimal128 type for column %d", field, colIdx) } } cols[rowIdx] = d @@ -741,7 +762,7 @@ func GetBatchData(param *ExternalParam, plh *ParseLineHandler, proc *process.Pro d, err := types.ParseTimestamp(time.UTC, field, vec.Typ.Precision) if err != nil { logutil.Errorf("parse field[%v] err:%v", field, err) - return nil, fmt.Errorf("the input value '%v' is not Timestamp type for colnum %d", field, colIdx) + return nil, fmt.Errorf("the input value '%v' is not Timestamp type for column %d", field, colIdx) } cols[rowIdx] = d } diff --git a/test/cases/dtype/json.test b/test/cases/dtype/json.test index 9f7fd612dcc0b3b2520aecc46882b443f48b08c3..1d4b147c6a61f9a1f51458e7c0eeec832fe09635 100644 --- a/test/cases/dtype/json.test +++ b/test/cases/dtype/json.test @@ -17,4 +17,20 @@ update t1 set a='{"t1":"qaq"}' where b=1; delete from t1 where b=2; select * from t1; delete from t1 where b=1; -select * from t1; \ No newline at end of file +select * from t1; + +#Test cases of load and export +drop table if exists t2; +create table t2 (a json,b int); +delete from t1; +insert into t1 values ('{"t1":"a"}',1),('{"t1":"b"}',2); +select * from t1 into outfile '/tmp/tmp_xadadwhfefef.csv' fields enclosed by '"' force_quote(a); +load data infile '/tmp/tmp_xadadwhfefef.csv' into table t1 ignore 1 lines; +load data infile '/tmp/tmp_xadadwhfefef.csv' into table t2 ignore 1 lines; +select * from t1; +select * from t2; +select * from t1 into outfile '/tmp/tmp_daiufwihwidhqiwfhei.csv' fields terminated by '\t'; +load data local infile '/tmp/tmp_daiufwihwidhqiwfhei.csv' into table t1 fields terminated by '\t' ignore 1 lines; +load data local infile '/tmp/tmp_daiufwihwidhqiwfhei.csv' into table t2 fields terminated by '\t' ignore 1 lines; +select * from t1; +select * from t2; \ No newline at end of file diff --git a/test/result/dtype/json.result b/test/result/dtype/json.result index e0f14638e5cbe15714afedc26a55a125a0512517..b578ac1a42b13edeb4657d7cb5c685cfdf333c26 100644 --- a/test/result/dtype/json.result +++ b/test/result/dtype/json.result @@ -18,4 +18,44 @@ a b {"t1": "qaq"} 1 delete from t1 where b=1; select * from t1; -a b \ No newline at end of file +a b +drop table if exists t2; +create table t2 (a json,b int); +delete from t1; +insert into t1 values ('{"t1":"a"}',1),('{"t1":"b"}',2); +select * from t1 into outfile '/tmp/tmp_xadadwhfefef.csv' fields enclosed by '"' force_quote(a); +a b +load data infile '/tmp/tmp_xadadwhfefef.csv' into table t1 ignore 1 lines; +load data infile '/tmp/tmp_xadadwhfefef.csv' into table t2 ignore 1 lines; +select * from t1; +a b +{"t1": "a"} 1 +{"t1": "b"} 2 +{"t1": "a"} 1 +{"t1": "b"} 2 +select * from t2; +a b +{"t1": "a"} 1 +{"t1": "b"} 2 +select * from t1 into outfile '/tmp/tmp_daiufwihwidhqiwfhei.csv' fields terminated by '\t'; +a b +load data local infile '/tmp/tmp_daiufwihwidhqiwfhei.csv' into table t1 fields terminated by '\t' ignore 1 lines; +load data local infile '/tmp/tmp_daiufwihwidhqiwfhei.csv' into table t2 fields terminated by '\t' ignore 1 lines; +select * from t1; +a b +{"t1": "a"} 1 +{"t1": "b"} 2 +{"t1": "a"} 1 +{"t1": "b"} 2 +{"t1": "a"} 1 +{"t1": "b"} 2 +{"t1": "a"} 1 +{"t1": "b"} 2 +select * from t2; +a b +{"t1": "a"} 1 +{"t1": "b"} 2 +{"t1": "a"} 1 +{"t1": "b"} 2 +{"t1": "a"} 1 +{"t1": "b"} 2