import java.io.BufferedInputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.ProxyConfig;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.util.NameValuePair;
public class TestDynamicIpContinue {
public static List ipList = new ArrayList<>();
public static boolean gameOver = false;
public static void main(String[] args) throws Exception {
long fetchIpSeconds = 5;
int testTime = 3;
String order = "88888888888888888888888888888";
String targetUrl = "http://www.dianping.com/shop/6000000/";
String referer = "";
boolean https = true;
boolean outputHeaderInfo = false;
boolean useJS = false;
int timeOut = 10000;
if (order == null || "".equals(order)) {
System.err.println("请输入爬虫(动态)代理订单号");
return;
}
System.out.println(">>>>>>>>>>>>>>动态IP测试开始<<<<<<<<<<<<<<");
System.out.println("***************");
System.out.println("提取IP间隔 " + fetchIpSeconds + " 秒 ");
System.out.println("爬虫目标网址 " + targetUrl);
System.out.println("***************\n");
TestDynamicIpContinue tester = new TestDynamicIpContinue();
new Thread(tester.new GetIP(fetchIpSeconds * 1000, testTime, order, targetUrl, useJS, timeOut, referer, https, outputHeaderInfo)).start();
while(!gameOver){
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
System.out.println(">>>>>>>>>>>>>>动态IP测试结束<<<<<<<<<<<<<<");
System.exit(0);
}
public class Crawler extends Thread{
@Override
public void run() {
webParseHtml(targetUrl);
}
long sleepMs = 200;
boolean useJs = false;
String targetUrl = "";
int timeOut = 5000;
String ipport = "";
String referer;
boolean https;
boolean outputHeaderInfo;
public Crawler(long sleepMs, String targetUrl, boolean useJs, int timeOut, String ipport, String referer, boolean https, boolean outputHeader) {
this.sleepMs = sleepMs;
this.targetUrl = targetUrl;
this.useJs = useJs;
this.timeOut = timeOut;
this.ipport = ipport;
this.referer = referer;
this.https = https;
this.outputHeaderInfo = outputHeader;
}
public String webParseHtml(String url) {
String html = "";
BrowserVersion[] versions = { BrowserVersion.CHROME, BrowserVersion.FIREFOX_38, BrowserVersion.INTERNET_EXPLORER_11, BrowserVersion.INTERNET_EXPLORER_8};
WebClient client = new WebClient(versions[(int)(versions.length * Math.random())]);
try {
client.getOptions().setThrowExceptionOnFailingStatusCode(false);
client.getOptions().setJavaScriptEnabled(useJs);
client.getOptions().setCssEnabled(false);
client.getOptions().setThrowExceptionOnScriptError(false);
client.getOptions().setTimeout(timeOut);
client.getOptions().setAppletEnabled(true);
client.getOptions().setGeolocationEnabled(true);
client.getOptions().setRedirectEnabled(true);
client.getOptions().setUseInsecureSSL(https);
if (referer != null && !"".equals(referer)) {
client.addRequestHeader("Referer", referer);
}
if (ipport != null) {
ProxyConfig proxyConfig = new ProxyConfig((ipport.split(",")[0]).split(":")[0], Integer.parseInt((ipport.split(",")[0]).split(":")[1]));
client.getOptions().setProxyConfig(proxyConfig);
}else {
System.out.print(".");
return "";
}
long startMs = System.currentTimeMillis();
Page page = client.getPage(url);
WebResponse response = page.getWebResponse();
if (outputHeaderInfo) {
List headers = response.getResponseHeaders();
for (NameValuePair nameValuePair : headers) {
System.out.println(nameValuePair.getName() + "-->" + nameValuePair.getValue());
}
}
boolean isJson = false ;
if (response.getContentType().equals("application/json")) {
html = response.getContentAsString();
isJson = true ;
}else if(page.isHtmlPage()){
html = ((HtmlPage)page).asXml();
}
long endMs = System.currentTimeMillis();
Document doc = Jsoup.parse(html);System.out.println(getName() + " " + ipport + " 用时 " + (endMs - startMs) + "毫秒 :" + doc.select("title").text());
} catch (Exception e) {
System.err.println(ipport + ":" + e.getMessage());
} finally {
client.close();
}
return html;
}
}
public class GetIP implements Runnable{
long sleepMs = 1000;
int maxTime = 3;
String order = "";
String targetUrl;
boolean useJs;
int timeOut;
String referer;
boolean https;
boolean outputHeaderInfo;
public GetIP(long sleepMs, int maxTime, String order, String targetUrl, boolean useJs, int timeOut, String referer, boolean https, boolean outputHeaderInfo) {
this.sleepMs = sleepMs;
this.maxTime = maxTime;
this.order = order;
this.targetUrl = targetUrl;
this.useJs = useJs;
this.timeOut = timeOut;
this.referer=referer;
this.https=https;
this.outputHeaderInfo=outputHeaderInfo;
}
@Override
public void run() {
int time = 1;
while(!gameOver){
if(time >= 4){
gameOver = true;
break;
}
try {
java.net.URL url = new java.net.URL("http://api.ip.data5u.com/dynamic/get.html?order=" + order + "&ttl&random=true");
HttpURLConnection connection = (HttpURLConnection)url.openConnection();
connection.setConnectTimeout(3000);
connection = (HttpURLConnection)url.openConnection();
InputStream raw = connection.getInputStream();
InputStream in = new BufferedInputStream(raw);
byte[] data = new byte[in.available()];
int bytesRead = 0;
int offset = 0;
while(offset < data.length) {
bytesRead = in.read(data, offset, data.length - offset);
if(bytesRead == -1) {
break;
}
offset += bytesRead;
}
in.close();
raw.close();
String[] res = new String(data, "UTF-8").split("\n");
System.out.println(">>>>>>>>>>>>>>当前返回IP量 " + res.length);
for (String ip : res) {
new Crawler(100, targetUrl, useJs, timeOut, ip, referer, https, outputHeaderInfo).start();
}
} catch (Exception e) {
System.err.println(">>>>>>>>>>>>>>获取IP出错, " + e.getMessage());
}
try {
Thread.sleep(sleepMs);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
}
public String joinList(List list){
StringBuilder re = new StringBuilder();
for (String string : list) {
re.append(string).append(",");
}
return re.toString();
}
public String trim(String html) {
if (html != null) {
return html.replaceAll(" ", "").replaceAll("\n", "");
}
return null;
}
}
无忧代理IP(www.data5u.com)原创文章,转载请注明出处。