C++ Hadoop实战备忘(二)
ount
cd wordcount
sudo gedit wordcount.cpp
[cpp]
#include
#include
#include
#include"stdint.h"
#include"hadoop/Pipes.hh"
#include"hadoop/TemplateFactory.hh"
#include"hadoop/StringUtils.hh"
using namespace std;
class WordCountMapper:public HadoopPipes::Mapper
{
public:
WordCountMapper(HadoopPipes::TaskContext& context){}
void map(HadoopPipes::MapContext& context)
{
string line =context.getInputValue();
vectorword = HadoopUtils::splitString(line, " ");
for (unsigned int i=0; i
mapred.job.name
WordCount
mapred.reduce.tasks
10
mapred.task.timeout
180000
hadoop.pipes.executable
/user/hadoop/bin/wordcount
Executable path is given as"path#executable-name"
mapred.create.symlink
yes
hadoop.pipes.java.recordreader
true
hadoop.pipes.java.recordwriter
true
{
context.emit(word[i],HadoopUtils::toString(1));
}
}
};
class WordCountReducer:public HadoopPipes::Reducer
{
public:
WordCountReducer(HadoopPipes::TaskContext& context){}
void reduce(HadoopPipes::ReduceContext& context)
{
int count = 0;
while (context.nextValue())
{
count +=HadoopUtils::toInt(context.getInputValue());
}
context.emit(context.getInputKey(),HadoopUtils::toString(count));
}
};
int main(int argc, char **argv)
{
return HadoopPipes::runTask(HadoopPipes::TemplateFactory());
}
2、创建Makefile编译文件
sudo gedit Makefile
[plain]
CC = g++
HADOOP_INSTALL = /usr/local/hadoop
PLATFORM =
Linux-i386-32
CPPFLAGS = -m32 -I$(HADOOP_INSTALL)/c++/$(PLATFORM)/include
LIBS = -L$(HADOOP_INSTALL)/c++/$(PLATFORM)/lib -lhadooppipes -lhadooputils -lpthread
wordcount: wordcount.cpp
$(CC) $(CPPFLAGS) $< -Wall $(LIBS) -g -O2 -o $@
3、运行编译文件
make
4、上传运行程序到hdfs
hadoop fs -mkdir bin
hadoop fs -put wordcount bin
5、准备测试文件
sudo gedit myfile.txt
文件内容:1 2 3 4 5 6 7 8 9 10 11 12 13。
6、上传测试文件
hadoop fs -mkdir input
hadoop fs -put myfile.txt input
7、编写配置文件
sudo gedit job_config.xml
[plain]
< xml version="1.0" >
sothat the executable will havea symlink in working directory.
This can be used for gdbdebugging etc.
8、运行任务
hadoop fs -rmr output
hadoop pipes -conf job_config.xml -input input/myfile.txt -output output -program bin/wordcount
9、查看结果www.2cto.com
hadoop fs -ls output
hadoop fs -cat output/part-00000
hadoop fs -cat output/part-00001
结果格式如下
[plain]
1 1
12 1
13 1
2 1
3 1
4 1
5 1