97%
93%
导入时间(sec)
2440
438
673
3926
导入速度(MB/sec)
7.13
39.74
30.43
5.21
4.3单点的vertica与infobright数据查询测试
trace_htlorder表6千万行数据
#常用查询 WHERE BETWEEN AND LIKE ORDER BY
sql01 = "SELECTTextData,StartTime,EndTime,DatabaseName,ObjectName,HostName,ApplicationName,LoginName,Duration,Reads,Writes,CPU,RowCounts,Error,HashCodeFROM trace_htlorder WHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime< '2014-05-15 02:00:00 ' AND TextData LIKE '%GROUP BY%' ORDER BY StartTime"
total: 457.178461075
total: 2516.28422379
#常用查询 WHERE BETWEEN AND LIKE GROUP BY
sql02 = "SELECTTextData,StartTime,EndTime,DatabaseName ,ObjectName,HostName,ApplicationName,LoginName,Duration,Reads,Writes,CPU,RowCounts,Error,HashCodeFROM trace_htlorder WHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime< '2014-05-15 02:00:00 ' AND TextData LIKE '%GROUP BY%' GROUP BY TextData,StartTime,EndTime,DatabaseName,ObjectName,HostName,ApplicationName,LoginName,Duration,Reads,Writes,CPU,RowCounts,Error,HashCode"
total: 278.15408802
total: 3426.65513086
#全字段,无条件 一千万行
sql03 = "SELECT TextData,TransactionID,HostName ,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes,CPU ,Success,ServerName,EventClass,Error,ObjectName,DatabaseName,DBUserName,RowCoun
ts,XactSequence,hashcode,filter FROMtrace_htlorder limit 100000"
total: 60.5239160061
total: 26.4451019764
#个别字段,无条件 一千万行
sql04= "SELECT HostName,ApplicationName,ServerName,DatabaseName,DBUserName FROM trace_htlorder limit100000"
total: 3.23667478561
total: 3.97934985161
#全字段,WHERE BETWEEN AND LIKE GROUP BY
sql05 = "SELECT TextData,TransactionID,HostName ,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes,CPU ,Success,ServerName,EventClass,Error,ObjectName,DatabaseName,DBUserName,RowCoun
ts,XactSequence,hashcode,filter FROMtrace_htlorder WHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime <'2014-05-15 02:00:00 ' AND TextData LIKE '%GRUOP BY%' GROUP BY TextData,TransactionID ,HostName
,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes,CPU ,Success,ServerName,EventClass,Error,ObjectName,DatabaseName,DBUserName,RowCounts,XactSequence,hashcode,filter"
total: 58.3177318573
total: 1042.12857699
#个别字段,WHERE BETWEEN AND LIKE GROUP BY
sql06 = "SELECT HostName,ApplicationName,ServerName,DatabaseName,DBUserName FROM trace_htlorder WHEREStartTime >= '2014-05-15 00:00:00 ' AND StartTime < '2014-05-15 02:00:00' AND TextData LIKE '%GRUOP BY%' GROUPBY HostName ,ApplicationName,ServerName,DatabaseName,DBUserName"
total: 53.3943109512
total: 987.122587919
#全字段,WHERE BETWEEN AND LIKE ORDER BY 分别按不同字段排序DESC/ASC
sql07 = "SELECT TextData,TransactionID,HostName,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes ,CPU,Success,ServerName,EventClass,Error,ObjectName,DatabaseName,DBUserName,RowCoun
ts,XactSequence,hashcode,filter FROMtrace_htlorder WHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime <'2014-05-15 02:00:00 ' AND TextData LIKE '%GRUOP BY%' ORDER BY Duration DESC"
total: 39.8180880547
total: 984.204402924
sql08 = "SELECT TextData,TransactionID,HostName ,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes,CPU ,Success,ServerName,EventClass,Error,ObjectName,DatabaseName,DBUserName,RowCoun
ts,XactSequence,hashcode,filter FROMtrace_htlorder WHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime <'2014-05-15 02:00:00 ' AND TextData LIKE '%GRUOP BY%' ORDER BY Duration ASC"
total: 40.1778831482
total: 984.4428339
sql09 = "SELECT TextData,TransactionID,HostName,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes ,CPU,Success,ServerName,EventClass,Error,ObjectName, DatabaseName,DBUserName,RowCoun
ts,XactSequence,hashcode,filter FROMtrace_htlorder WHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime <'2014-05-15 02:00:00 ' AND TextData LIKE '%GRUOP BY%' ORDER BY SPID DESC"
total: 39.5487890244
total: 984.184374094
sql10 = "SELECT TextData,TransactionID,HostName,ApplicationName,LoginName,SPID,Duration,StartTime,EndTime,Reads,Writes ,CPU,Success,ServerName,EventClass,Error,ObjectName,DatabaseName,DBUserName,RowCoun
ts,XactSequence,hashcode,filter FROM trace_htlorderWHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime < '2014-05-1502:00:00 ' AND TextData LIKE '%GRUOP BY%' ORDER BY SPID ASC"
total: 39.7879989147
total: 984.206639051
#个别字段,WHERE BETWEEN AND LIKE ORDER BY 分别按不同字段排序DESC/ASC
sql11 = "SELECTHostName,ApplicationName,ServerName,DatabaseName,DBUserName FROM trace_htlorderWHERE StartTime >= '2014-05-15 00:00:00 ' AND StartTime < '2014-05-1502:00:00 ' AND TextData LIKE '%GRUOP BY%' ORDER BY Duration DESC;"
total: 48.5239658356
total: 990.392697096
sql12 = "SELECT HostName,ApplicationName,ServerName,DatabaseName,DBUserName FROM trace_htlorder WHEREStartTime >= '2014-05-15 00:00:00 ' AND StartTime < '2014-05-15 02:00:00' AND TextData LIKE '%GRUOP BY%' ORDERBY Duration ASC"
total: 48.5928411484
total: 990.359231949
sql13 = "SELECT HostName,ApplicationName,ServerName,DatabaseName,DBUserName FROM trace_htlorder WHEREStartTime >= '2014-05-15 00:00:00 ' AND StartTime < '2014-05-15 02:00:00' AND TextData LIKE '%GRUOP BY%' ORDERBY SPID DESC"
total: 48.5275650024
total: 990.474725962
sql14 = "SELECT HostName,ApplicationName,ServerName,DatabaseName,DBUserName FROM trace_htlorder WHEREStartTime >= '2014-05-15 00:00:00 ' AND StartTime < '2014-05-15 02:00:00' AND TextData LIKE '%GRUOP BY%' ORDERBY SPID ASC"
total: 48.8557629585
total: 990.433614969
total为无间断连续运行十次的总时间,上边为vertica的值,下边为infobright的值
14条语句各运行十遍的总时间:
vertica:1264.63847303
infobright:15901.3139489
结论
对于包含like、groupby、order by等条件的数据 ,vertica查询速度明显快于infobright。最快可达20倍以上。对于没有查询条件的语句两者相当,查询列为全部字段时vertica劣于infobright。查询列为部分字段时vertica略优于infobright。
注意事项
vertica中 longvarchar 不支持 LIKE操作,只有char,varchar,binary,varbinary支持,所以需先将原long varchar改为varchar
alter table trace_htlorder alter column TextDataset data type varchar(65000);
infobright SQL 中不能包含关键字 如 database,host,writes,reads等,关键字需用反引号括起来,若是在linux shell里直接以命令方式执行还需对反引号转义,否则会被解析为命令。
4.4测试包含三个节点的verticacluster(k-safety设置为1)导入数据
查看cluster状态
dmintools -t view_cluster
DB | Host | State
-------------+------+-------
testVertica | ALL | UP
查看cluster中的节点
admintools -t list_allnodes;
Node | Host | State | Version | DB
------------------------+---------------+-------+-------------------+-------------
v_testvertica_node0001 | 192.X.X.A | UP | vertica-7.0.1.000 | testVertica
v_testvertica_node0003 | 192.X.X.B | UP | vertica-7.0.1.000 | testVertica
v_testvertica_node0004 | 192.X.X.C | UP | vertica-7.0.1.000 | testVertica
测试用的数据库表
databases:testVertica
schema:public
table:sbtest、trace_htlorder
测试只在v_testvertica_node0001节点导入数据后的数据分布情况
数据:sbtest.txt -17G-2亿行
copy public.sbtest from '/data/tmp/sbtest.txt' onv_testvertica_node0001 DELIMITER'*' ENCLOSED BY '"' DIRECT;
Time: First fetch (1 row): 233226.446 ms. Allrows formatted: 233226.528 ms
原来单节点时时间的一般左右,数据被压缩后各节点上大约有2.5G总共2.5*3=7.5G刚好是原来3.8G的两倍左右,证明vertica在cluster内存储了两个副本以保证值为1的k-safety
数据:trace_order_less.txt -20G-3千万行
copy public.trace_htlorder from '/data/tmp/trace_order_less.txt'on v_testvertica_node0001 DELIMITER'*' ENCLOSED BY '"' DIRECT;
Time: First fetch (1 row): 1927267.476 ms. Allrows formatted: 1927267.623 ms
原来单节点时时间的一般左右,数据被压缩后各节点上大约有1G总共1*3=3G刚好是原来1.4G的两倍左右,证明vertica在cluster内存储了两个副本以保证值为1的k-safety
测试在v_testvertica_node0001、 v_testvertica_node0003、 v_testvertica_node0004多节点并行导入
数据:sbtest.txt -17G-2亿行,分别存在于A、B、C三个节点中
copy public.sbtest from '/data/tmp/sbtest.txt' onv_testvertica_node0001,'/tmp/sbtest.txt' on v_testvertica_node0003,'/tmp/sbtest.txt'on v_testvertica_node0004 DELIMITER '*' ENCLOSED BY '"' DIRECT;
Time: First fetch (1 row): 364039.015 ms. Allrows formatted: 364039.062 ms
6亿行数据被导入cluster用时6.3分钟
数据:trace_order_less.txt -20G-3千万行,别存在于A、B、C三个节点中
copy public.trace_htlorder from'/data/tmp/trace_order_less.txt' onv_testvertica_node0001,'/tmp/trace_order_less.txt' onv_testvertica_node0003,'/tmp/trace_order_less.txt'