package com.f8fm.spider;
import org.apache.commons.lang3.builder.ToStringBuilder;
import org.nutz.dao.Dao;
import org.nutz.dao.entity.annotation.Table;
import org.nutz.dao.impl.NutDao;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Task;
import us.codecraft.webmagic.model.AfterExtractor;
import us.codecraft.webmagic.model.OOSpider;
import us.codecraft.webmagic.model.annotation.ExtractBy;
import us.codecraft.webmagic.model.annotation.HelpUrl;
import us.codecraft.webmagic.model.annotation.TargetUrl;
import us.codecraft.webmagic.pipeline.PageModelPipeline;
import com.alibaba.druid.pool.DruidDataSource;
/**
* @author code4crafter@gmail.com
*/
@TargetUrl("http://www.jokeji.cn/jokehtml/jy/\d+.htm")
@HelpUrl("http://www.jokeji.cn/list\w+.htm")
@Table("sp_news")
public class JokejiModel implements AfterExtractor {
public String toString() {
return title.toString();
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@ExtractBy("//title/regex('<title>([^_]+)',1)")
private String title;
@ExtractBy("//span[@id=text110]/tidyText()")
private String content;
public static DruidDataSource ds;
public static Dao dao;
public static void main(String[] args) {
/*driverClassName : "com.mysql.jdbc.Driver",
url : "jdbc:mysql://192.168.0.253:3306/youfang?useUnicode=true&characterEncoding=utf-8",
username : "root",
password : ""*/
ds=new DruidDataSource();
ds.setDriverClassName( "com.mysql.jdbc.Driver");
ds.setUrl( "jdbc:mysql://192.168.0.253:3306/youfang?useUnicode=true&characterEncoding=utf-8");
ds.setUsername("root");
ds.setPassword("");
dao= new NutDao(ds);
OOSpider.create(Site.me().setDomain("www.jokeji.cn").setCharset("gbk").setSleepTime(100).setTimeOut(3000).setUserAgent("Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)")
, new PageModelPipeline<Object>() {
public void process(Object o, Task task) {
System.out.println(o);
}
}, JokejiModel.class)
.addUrl("http://www.jokeji.cn/")
.thread(2)
.run();
ds.close();
}
public void afterProcess(Page page) {
dao.insert(this);
System.out.println(page.getUrl());
}
public class ConsolePageModelPipeline implements PageModelPipeline {
public void process(Object o, Task task) {
System.out.println(ToStringBuilder.reflectionToString(o));
}
}
}