工作日志-数据预处理

目标:

处理启明项目数据库中的数据,并将有用的字段处理成便于开展数据挖掘工作的形式,以XML文本形式输出保存。

github repository 地址:https://github.com/Simonic512/QiMingProject

日志:

19/11/2015早:

编写了连接数据库的相关代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import java.sql.*;
import java.util.ArrayList;

/**
* Created by simon on 15/11/19.
*/

public class ConMySQL{
static ArrayList<TrackClass> track = new ArrayList<>();

public static void main(String[] args){
// 驱动程序名
String driver = "com.mysql.jdbc.Driver";
// URL指向要访问的数据库名scutcs
//String url = "jdbc:mysql://localhost:3306/MyData";
String url = "jdbc:mysql://192.168.1.115/qiming";
// MySQL配置时的用户名
String user = "root";
// MySQL配置时的密码
String password = "mysql";
try {
// 加载驱动程序
Class.forName(driver);

// 连接数据库
Connection conn = DriverManager.getConnection(url, user, password);

if(!conn.isClosed())
System.out.println("Succeeded connecting to the Database!");

// statement用来执行SQL语句
Statement statement = conn.createStatement();

// 要执行的SQL语句
String sql = "select * from hy_track_201505";
//String sql = "select * from Text";
// 结果集
ResultSet rs = statement.executeQuery(sql);

System.out.println("Succeeded reading the Database!");

ArrayList<TrackClass> trackClassArrayList = DB2Array.ToArray(rs);

Array2XML.ToXML(trackClassArrayList);

rs.close();
conn.close();

} catch(ClassNotFoundException e) {
System.out.println("Sorry,can`t find the Driver!");
e.printStackTrace();
} catch(SQLException e) {
e.printStackTrace();
} catch(Exception e) {
e.printStackTrace();
}
}

}

19/11/2015 晚

完成读取数据库到集合,从集合保存到XML的简单操作代码,具体细节需要根据数据字典进行修改。

DB to ArrayList :

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;

/**
* Created by simon on 15/11/19.
*/

public class DB2Array{
public static ArrayList<TrackClass> ToArray(ResultSet rs) throws SQLException {
ArrayList<TrackClass> track = new ArrayList<>();
int CID;
int year;

while(rs.next()) {

TrackClass trackClass = new TrackClass();
CID = new Integer(rs.getString("CID"));
trackClass.setCID(CID);
year = new Integer(rs.getString("T_Status"));
trackClass.setYear(year);


// 首先使用ISO-8859-1字符集将name解码为字节序列并将结果存储新的字节数组中。
// 然后使用GB2312字符集解码指定的字节数组
// name = new String(name.getBytes("ISO-8859-1"),"GB2312");

// 输出结果
track.add(trackClass);
System.out.println(rs.getString("CID") + "\t" + trackClass.getCID());
System.out.println(rs.getString("T_Status")+ "\t" + trackClass.getYear());

}
return track;
}
}

ArrayList to XML

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import org.dom4j.Document;
import org.dom4j.DocumentHelper;
import org.dom4j.Element;
import org.dom4j.io.OutputFormat;
import org.dom4j.io.XMLWriter;

import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;

/**
* Created by simon on 15/11/19.
*/

public class Array2XML {

public static void ToXML(ArrayList<TrackClass> list) throws IOException {
Document document = DocumentHelper.createDocument();
Element root = DocumentHelper.createElement("Track");
document.setRootElement(root);
//给根节点添加属性
root.addAttribute("DataBaseName", "WL_Track201505").addAttribute("Date", "18/11/2015");
Iterator<TrackClass> iter = list.iterator();
while(iter.hasNext()) {
TrackClass track = iter.next();
int CID = track.getCID();
int year = track.getYear();

//给根节点添加孩子节点
Element element = root.addElement("Car"+" ");
element.addElement("CID").addText(String.valueOf(CID));
element.addElement("YEAR").addText(String.valueOf(year));
}

//把生成的xml文档存放在硬盘上 true代表非紧凑输出
OutputFormat format = new OutputFormat(" ",true);
format.setEncoding("GBK");//设置编码格式
XMLWriter xmlWriter = new XMLWriter(new FileOutputStream("Person.xml"),format);

xmlWriter.write(document);
try {
xmlWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}

(水)