决策树算法Java实现示例(二)

2014-11-24 08:24:16 · 作者: · 浏览: 1
.getAttribute(attrNames[attrIndex]); Map> split = curSplits.get(attrValue); if (split == null) { split = new HashMap>(); curSplits.put(attrValue, split); } List splitSamples = split.get(category); if (splitSamples == null) { splitSamples = new LinkedList(); split.put(category, splitSamples); } splitSamples.add(sample); } allCount += samples.size(); } // 计算将当前属性作为测试属性的情况下在各分支确定新样本的分类需要的信息量之和 double curValue = 0.0; // 计数器:累加各分支 for (Map> splits : curSplits.values()) { double perSplitCount = 0; for (List list : splits.values()) perSplitCount += list.size(); // 累计当前分支样本数 double perSplitValue = 0.0; // 计数器:当前分支 for (List list : splits.values()) { double p = list.size() / perSplitCount; perSplitValue -= p * (Math.log(p) / Math.log(2)); } curValue += (perSplitCount / allCount) * perSplitValue; } // 选取最小为最优 if (minValue > curValue) { minIndex = attrIndex; minValue = curValue; minSplits = curSplits; } } return new Object[] { minIndex, minValue, minSplits }; } /** * 将决策树输出到标准输出 */ static void outputDecisionTree(Object obj, int level, Object from) { for (int i = 0; i < level; i++) System.out.print("|-----"); if (from != null) System.out.printf("(%s):", from); if (obj instanceof Tree) { Tree tree = (Tree) obj; String attrName = tree.getAttribute(); System.out.printf("[%s = ]\n", attrName); for (Object attrValue : tree.getAttributeva lues()) { Object child = tree.getChild(attrValue); outputDecisionTree(child, level + 1, attrName + " = " + attrValue); } } else { System.out.printf("[CATEGORY = %s]\n", obj); } } /** * 样本,包含多个属性和一个指明样本所属分类的分类值 */ static class Sample { private Map
attributes = new HashMap(); private Object category; public Object getAttribute(String name) { return attributes.get(name); } public void setAttribute(String name, Object value) { attributes.put(name, value); } public Object getCategory() { return category; } public void setCategory(Object category) { this.category = category; } public String toString() { return attributes.toString(); } } /** * 决策树(非叶结点),决策树中的每个非叶结点都引导了一棵决策树 * 每个非叶结点包含一个分支属性和多个分支,分支属性的每个值对应一个分支,该分支引导了一棵子决策树 */ static class Tree { private String attribute; private Map children = new HashMap(); public Tree(String attribute) { this.attribute = attribute; } public String getAttribute() { return attribute; } public Object getChild(Object attrValue) { return children.get(attrValue); } public void setChild(Object attrValue, Object child) { children.put(attrValue, child); } public Set getAttributeva lues() { return children.keySet(); } } }