shevonWang's Blog

Python 决策树

#!usr/bin/env python
#-*- coding: utf-8 -*-

from sklearn import tree
import pandas as pd
import pydotplus

input_file = '../explore-demo.xls'
output_file = '../data-induction.xlsx'
pdf_file = '../model.pdf'

#将定性数据转换定量数据
def get_integer(df_columns):
    if df_columns.name == u'销售类型':
        for i in range(len(df_columns)):
            if df_columns[i] == u'国产轿车':
                df_columns[i] = 0
            elif df_columns[i] == u'进口轿车':
                df_columns[i] = 1
            elif df_columns[i] == u'大客车':
                df_columns[i] = 2
            elif df_columns[i] == u'其它':
                df_columns[i] = 3
            elif df_columns[i] == u'商用货车':
                df_columns[i] = 4
            elif df_columns[i] == u'微型面包车':
                df_columns[i] = 5
            elif df_columns[i] == u'卡车及轻卡':
                df_columns[i] = 6
            else:
                df_columns[i] = 7
    elif df_columns.name == u'销售模式':
        for i in range(len(df_columns)):
            if df_columns[i] == u'4S店':
                df_columns[i] = 0
            elif df_columns[i] == u'一级代理商':
                df_columns[i] = 1
            elif df_columns[i] == u'多品牌经营店':
                df_columns[i] = 2
            elif df_columns[i] == u'其它':
                df_columns[i] = 3
            else:
                df_columns[i] = 4
    else:
        for i in range(len(df_columns)):
            if df_columns[i] == u'正常':
                df_columns[i] = 0
            else:
                df_columns[i] = 1

    return df_columns

df = pd.read_excel(input_file)
df[u'销售类型'] = get_integer(df[u'销售类型'])
df[u'销售模式'] = get_integer(df[u'销售模式'])
df[u'输出'] = get_integer(df[u'输出'])
df.to_excel(output_file)

df = pd.read_excel(output_file, header=1)
df.columns = list(range(17))  #赋值列名
df = df[list(range(17))[2:]]  #切掉首两列
df.columns = list(range(15))  #重新赋值列名

p = 0.8  #设置训练数据比例
train = df[:int(len(df)*p)]  #训练数据集
test = df[int(len(df)*p):]  #测试数据集

model = tree.DecisionTreeClassifier()  #建立决策树模型模型
model = model.fit(train[list(range(14))], train[14])  #训练模型
dot_model = tree.export_graphviz(model, out_file=None)
graph = pydotplus.graph_from_dot_data(dot_model)  #生成图形
graph.write_pdf(pdf_file)   #保存成pdf文件

test1 = test[list(range(14))]

print(test)
print(model.predict(test1))