Oracle 快速load数据的方法--SQL*Loader

2014-11-24 18:58:42 · 作者: · 浏览: 7

准备数据文件


在内存的vector中生成1KW条记录,然后写到指定的数据文件中


#include
#include


#include
#include
using namespace std;


#include "clocktool.h"


typedef struct User_Record
{
char id[16];
char name[32];
char passwd[16];
}USER_RECORD;



int main(int argc, char** argv)
{
int iTimes = 10000000;


vector vec;
TimeTool cktool;


User_Record* pRecord = NULL;
cktool.begin();
for (int i = 0; i < iTimes; ++i)
{
pRecord = new User_Record;
memset(pRecord, 0, sizeof(struct User_Record));
sprintf(pRecord->id, "%d", i);
sprintf(pRecord->name, "%ld", 18600000000+i);
strcpy(pRecord->passwd, "111111");
vec.push_back(pRecord);
}
cktool.end();
printf("cost %f\n", cktool.getInterval());


cktool.reset();


cktool.begin();
FILE* pf = fopen("./load.data", "w");
if (pf)
{
char buff[256];
int iResult = 0;
for (int i = 0; i < iTimes; ++i)
{
memset(buff, 0, sizeof(buff));
iResult = sprintf(buff, "%s, %s, %s\n", vec[i]->id, vec[i]->name, vec[i]->passwd);
fwrite(buff, 1, iResult, pf);
}
fclose(pf);
}
cktool.end();
printf("cost %f\n", cktool.getInterval());


vector::iterator it = vec.begin();
for (; it != vec.end(); ++it)
{
delete (*it);
}
return 0;
}


执行结果:


cost 2.796853
cost 3.715381


通过SQL*Loader加载数据


编写load.ctl文件


load data
infile 'load.data'
into table base_users
fields terminated by ","
(userid, username, passwd)


清除原有数据


SQL> truncate table base_users;


对于大数据量的加载,追求效率的时候建议关闭log


SQL> alter table base_users nologging;


运行load加载命令


sqlldr userid=cat/cat control=load.ctl log=load.log direct=y



通过测试发现其中direct=y非常重要,大幅提升了数据加载的效率。检查load.log日志可以发现执行时间大概就是在12秒左右


Total logical records skipped: 0
Total logical records read: 10000000
Total logical records rejected: 0
Total logical records discarded: 0
Total stream buffers loaded by SQL*Loader main thread: 2204
Total stream buffers loaded by SQL*Loader load thread: 0



Run began on Sat Mar 31 22:47:07 2012
Run ended on Sat Mar 31 22:47:19 2012



Elapsed time was: 00:00:11.73
CPU time was: 00:00:05.31