package IRdll;


import java.io.File;
import java.io.Reader;
import java.io.FileInputStream;
import java.io.*;
import java.util.Date;
import java.util.HashSet;
/**
* <p>Title: Java中文分词接口</p>
* <p>Description: 本组件以哈工大分词系统为基础,在其基础之上开发
* 本组件仅供学习和研究用途,任何商业用途将自行承担法律后果,与组件编写人无关。</p>
* <p>Copyright: Copyright (c) 2006</p>
* <p>Company: dalian univercity of techology</p>
* @author :yezheng
* @version 1.0
*/

public class IRSplit {

private static IRSplit instance = null; //instance时类中一成员,所以可以访问其中被private修饰的变量或方法
private static StringBuffer stringb = new StringBuffer(102400);
private static StringBuffer longSentence = new StringBuffer(10240);

//私有构造方法
private IRSplit() {
System.out.println(“正在加载词典……”);
this.LoadSegRes();
System.out.println(“加载结束”);
}

//获得一个实例
public static IRSplit getInstance() {
if (instance == null) {
instance = new IRSplit();
}
return instance;
}

//本地方法
private native void LoadSegRes();

private native void ReleaseSegger();

private native String split(String sentence);

//对一句话分词
public String splitSentence(String sentence) {
if (sentence.length() < 1 || sentence == null)
return “”;
else
return split(sentence);
}

public String splitLongSentence(String sentence) {
if (sentence.length() < 1 || sentence == null)
return “”;
else
{
this.longSentence.setLength(0);
int start = 0 ; int  end = 0;
for (int i = 0; i < sentence.length(); i++)
{
char c = sentence.charAt(i);
switch (Character.getType(c))
{
case 24:
end++;
//System.out.println(sentence.substring(start, end));
this.longSentence.append(split(sentence.substring(start, end)));
start = end;
break;
default:
end++;
break;
}
}
if(start < end)
{
longSentence.append(split(sentence.substring(start, end)));
}
return longSentence.toString();
}
}

public void ReleaseSeggers() {
instance = null;
ReleaseSegger();
}

//对一个文件分词
public void splitFile(File file, File outfile) {
try {
FileInputStream fis = new FileInputStream(file);
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
this.stringb.setLength(0);
String ts;
while ( (ts = br.readLine()) != null) {
if (ts.length() != 0) {
stringb.append(splitLongSentence(ts) + ‘r’ + ‘n’); //进行分词
}
else {
stringb.append(‘r’);
stringb.append(‘n’);
}
}
br.close();
fis.close();

FileWriter writer = new FileWriter(outfile);
writer.write(stringb.toString());
writer.close();
}
catch (FileNotFoundException ex) {
System.out.println(file.toString() + “File not Found”);
}
catch (IOException ex1) {
System.out.println(file.toString() + “IO errors”);
}
}

public void splitFile(String source, String destination) {
File file = new File(source);
File outfile = new File(destination);
if (file.isFile()) {
splitFile(file, outfile);
}
}

public Reader splitFile(Reader reader) {
BufferedReader br = new BufferedReader(reader);
StringBuffer stringb = new StringBuffer();

try {
String ts;
while ( (ts = br.readLine()) != null) {
if (ts.length() != 0) {
stringb.append(splitSentence(ts) + ‘r’ + ‘n’); //进行分词
}
else {
stringb.append(‘r’);
stringb.append(‘n’);
}
}
reader = new StringReader(stringb.toString());
}
catch (IOException ex) {
}
return reader;
}

//处理一个目录下的所有文件
public void splitFiles(String sourceDir, String destinationDir) { //参数:源文件目录和目标文件目录
File directory = new File(sourceDir);
File dirdes = new File(destinationDir);
//FilenameFilter txtFilter = new myFilter(“txt”);
File files[] = directory.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isFile()) {
File outfile = new File(destinationDir + “/” +
files[i].getName());
//System.out.println(directory.getName() + “:” + dirdes.getName());
splitFile(files[i], outfile);
}
else if (files[i].isDirectory()) {

File tempdir = new File(destinationDir + “/” + files[i].getName());
if (!tempdir.exists() || !tempdir.isDirectory()) {
tempdir.mkdir();
}
splitFiles(sourceDir + “/” + files[i].getName(),
tempdir.getAbsolutePath());
}
}
}

static {
System.loadLibrary(“IRdll”);
}

public static void main(String[] args) {

IRSplit split = IRSplit.getInstance(); //其它类使用

long start = System.currentTimeMillis();
Date startdate = new Date();

//split.splitFiles(“clean”, “out”);
split.splitLongSentence(ss);

Date enddate = new Date();

System.out.println(startdate);
System.out.println(enddate);
System.out.println(enddate.getTime()- startdate.getTime());
}

}

 

 Leave a Reply

(required)

(required)


*

You may use these HTML tags and attributes: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong>

使用腾讯微博登陆

Protected by WP Anti Spam
   
© 2011 Information Retrieval Blog Suffusion theme by Sayontan Sinha