Goal:
This article shows a mapping relationships between SQL data types and Parquet logical types when using Drill to create a parquet file.
Env:
Drill 1.6
Solution:
Refer to this
Drill Doc, below experiment proves the theory.
1. Create a parquet file using Drill
create table dfs.tmp.alltypes
as
select cast('123456789' as bigint) as bigint_col,
cast('xyz' as binary) as binary_col,
cast('true' as boolean) as boolean_col,
cast('1234.56' as DECIMAL(6,2)) as decimal_col,
cast('2015-12-30' as date) as date_col,
cast('123.456' as float) as float_col,
cast('567.789' as double) as double_col,
cast('123' as int) as int_col,
INTERVAL '120' day(3) as interval_col,
cast('22:55:55.23' as TIME) as time_col,
cast('2015-12-30 22:55:55.23' as TIMESTAMP) as timestamp_col,
cast('words' as varchar) as varchar_col,
cast('abc' as char(3)) as char3_col
from sys.version;
2. Examine the parquet logical types using parquet-tool
# java -jar parquet-tools-1.6.1-SNAPSHOT.jar meta /mapr/my2.cluster.com/tmp/alltypes/0_0_0.parquet
file: file:/mapr/my2.cluster.com/tmp/alltypes/0_0_0.parquet
creator: parquet-mr version 1.8.1-drill-r0 (build 6b605a4ea05b66e1a6bf843353abcb4834a4ced8)
extra: drill.version = 1.6.0
file schema: root
--------------------------------------------------------------------------------
bigint_col: REQUIRED INT64 R:0 D:0
binary_col: REQUIRED BINARY R:0 D:0
boolean_col: REQUIRED BOOLEAN R:0 D:0
decimal_col: REQUIRED INT32 O:DECIMAL R:0 D:0
date_col: REQUIRED INT32 O:DATE R:0 D:0
float_col: REQUIRED FLOAT R:0 D:0
double_col: REQUIRED DOUBLE R:0 D:0
int_col: REQUIRED INT32 R:0 D:0
interval_col: REQUIRED FIXED_LEN_BYTE_ARRAY O:INTERVAL R:0 D:0
time_col: REQUIRED INT32 O:TIME_MILLIS R:0 D:0
timestamp_col: REQUIRED INT64 O:TIMESTAMP_MILLIS R:0 D:0
varchar_col: REQUIRED BINARY O:UTF8 R:0 D:0
char3_col: REQUIRED BINARY O:UTF8 R:0 D:0
row group 1: RC:1 TS:541 OFFSET:4
--------------------------------------------------------------------------------
bigint_col: INT64 SNAPPY DO:0 FPO:4 SZ:51/49/0.96 VC:1 ENC:PLAIN,BIT_PACKED
binary_col: BINARY SNAPPY DO:0 FPO:55 SZ:40/38/0.95 VC:1 ENC:PLAIN,BIT_PACKED
boolean_col: BOOLEAN SNAPPY DO:0 FPO:95 SZ:30/28/0.93 VC:1 ENC:PLAIN,BIT_PACKED
decimal_col: INT32 SNAPPY DO:0 FPO:125 SZ:39/37/0.95 VC:1 ENC:PLAIN,BIT_PACKED
date_col: INT32 SNAPPY DO:0 FPO:164 SZ:39/37/0.95 VC:1 ENC:PLAIN,BIT_PACKED
float_col: FLOAT SNAPPY DO:0 FPO:203 SZ:39/37/0.95 VC:1 ENC:PLAIN,BIT_PACKED
double_col: DOUBLE SNAPPY DO:0 FPO:242 SZ:51/49/0.96 VC:1 ENC:PLAIN,BIT_PACKED
int_col: INT32 SNAPPY DO:0 FPO:293 SZ:39/37/0.95 VC:1 ENC:PLAIN,BIT_PACKED
interval_col: FIXED_LEN_BYTE_ARRAY SNAPPY DO:0 FPO:332 SZ:63/61/0.97 VC:1 ENC:PLAIN,BIT_PACKED
time_col: INT32 SNAPPY DO:0 FPO:395 SZ:39/37/0.95 VC:1 ENC:PLAIN,BIT_PACKED
timestamp_col: INT64 SNAPPY DO:0 FPO:434 SZ:51/49/0.96 VC:1 ENC:PLAIN,BIT_PACKED
varchar_col: BINARY SNAPPY DO:0 FPO:485 SZ:46/44/0.96 VC:1 ENC:PLAIN,BIT_PACKED
char3_col: BINARY SNAPPY DO:0 FPO:531 SZ:40/38/0.95 VC:1 ENC:PLAIN,BIT_PACKED
No comments:
Post a Comment