做网站设计和推广,下载字体安装到wordpress,wordpress主题汉化是什么,手工制作风筝工作任务#xff1a;
今天老大让我跑取一个新闻网站#xff1a;https://www.yidaiyilu.gov.cn/
采坑记录#xff1a;
https协议#xff0c;如果利用http协议去请求会报出如下信息#xff1a;
错误#xff1a;SSLHandshake错误就知道了#xff0c;客户端与服务端进行连…工作任务
今天老大让我跑取一个新闻网站https://www.yidaiyilu.gov.cn/
采坑记录
https协议如果利用http协议去请求会报出如下信息
错误SSLHandshake错误就知道了客户端与服务端进行连接时需要通过SSL协议进行握手
(坑)改用重写DefaultHttpClient方法使其支持SSL协议 package httpsParse;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import org.apache.http.conn.ClientConnectionManager;
import org.apache.http.conn.scheme.Scheme;
import org.apache.http.conn.scheme.SchemeRegistry;
import org.apache.http.conn.ssl.SSLSocketFactory;
import org.apache.http.impl.client.DefaultHttpClient;
//用于进行Https请求的HttpClient
public class SSLClient extends DefaultHttpClient{ public SSLClient() throws Exception{ super();
//传输协议需要根据自己的判断 SSLContext ctx SSLContext.getInstance(TLSv1.2); X509TrustManager tm new X509TrustManager() { Override public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { } Override public void checkServerTrusted(X509Certificate[] chain, String authType) throws CertificateException { } Override public X509Certificate[] getAcceptedIssuers() { return null; } }; ctx.init(null, new TrustManager[]{tm}, null); SSLSocketFactory ssf new SSLSocketFactory(ctx,SSLSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER); ClientConnectionManager ccm this.getConnectionManager(); SchemeRegistry sr ccm.getSchemeRegistry(); sr.register(new Scheme(https, 443, ssf)); }
}
坑然后再利用HttpClient去请求获取网页源代码
public static void main(String[] args) throws Exception {HttpClientUtil httpClientUtil new HttpClientUtil();String url https://www.yidaiyilu.gov.cn/zchj.htm;String html httpClientUtil.doGet(url);System.out.println(html);}最后发现得到的结果是一段js代码
scriptvar xcatchdtoStringString36pathnameiftoLowerCasevar855captchaArray1for1500documentcharsattachEventaddEventListenersubstrExpiresfalsef0fromCharCodeinnerHTML8splitparseIntcreateElementgnew16searchMayhttpsreverseRegExpwhilecharCodeAtrOm9XFMtA3QKV7nYsPGT4lifyWwkq5vcjH2IdxUoCbhERLaz81DNB610JgSe0upZelsematch0xFF07lengtheeval19PathadivsetTimeoutcookie350xEDB88320GMTchallengeTuewindowhrefreturntrylocationonreadystatechangefunction1557242170DOMContentLoadedfirstChildreplace__jsl_clearancecharAtjoin.replace(/*$/,).split(),yg 3b3q(){31(3o.3h3o.c3o.1s.40(/[\\?|]i-39/,\\\\),q);s.32413r.h|19|(3q(){g 1i[3q(3b){3i 2n(9.1a(3b))},(3q(){g 3bs.1o(30);3b.1b2u 3h\\/\\3l/2u;3b3b.3u.3h;g 1i3b.2f(/20?:\\/\\//)[19];3b3b.14(1i.2k).f();3i 3q(1i){p(g 3l19;3l1i.2k;3l){1i[3l]3b.42(1i[3l])};3i 1i.43()}})()],3l[[([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[])[-~~~!{}[~~[]]-(-~~~!{})],(-~{}[][[]][19])[~~][-~(!{})],[34](-~[-~{}-~{}][[]][19]),[-~{}-~[-~{}-~{}]](((-~[]-~[])(-~[]-~[]))[[]][19]),(-~{}[][[]][19])(-~{}[][[]][19])[-~(!{})],(-~{}[][[]][19])(-~{}[][[]][19])[-~{}-~[-~{}-~{}]],[33-~(!{})-~(!{})](-~[-~{}-~{}][[]][19]),[34][-~(!{})],[-~~~!{}[~~[]]-(-~~~!{})](((-~[]-~[])(-~[]-~[]))[[]][19]),[33-~(!{})-~(!{})](-~[-~{}-~{}][[]][19]),(-~{}[][[]][19])[~~][33-~(!{})-~(!{})]],[(-~{}[][[]][19])(((-~[]-~[])(-~[]-~[]))[[]][19]),[33-~(!{})-~(!{})]],[[34][-~{}-~[-~{}-~{}]],(-~[-~{}-~{}][[]][19])[33-~(!{})-~(!{})],[34][~~],([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[])([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[]),([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[])(((-~[]-~[])(-~[]-~[]))[[]][19]),(-~{}[][[]][19])(-~{}[][[]][19])[34]],[(-~{}[][[]][19])[-~(!{})],([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[])],[[34](-~{}[][[]][19]),(((-~[]-~[])(-~[]-~[]))[[]][19])[~~],[34][34],[34]([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[]),[-~{}-~[-~{}-~{}]](((-~[]-~[])(-~[]-~[]))[[]][19])],[(-~{}[][[]][19])(((-~[]-~[])(-~[]-~[]))[[]][19]),(-~{}[][[]][19])[-~(!{})]],[([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[])[-~~~!{}[~~[]]-(-~~~!{})],(-~[-~{}-~{}][[]][19])[33-~(!{})-~(!{})],[34](-~{}[][[]][19]),([(-~[]-~[])]*(((!{})[(-~[]-~[])](-~[]-~[])))[])(((-~[]-~[])(-~[]-~[]))[[]][19])]];p(g 3b19;3b3l.2k;3b){3l[3b]1i.22()[(-~{}[][[]][19])](3l[3b])};3i 3l.43()})();153c, 2j-1t-2q 1r:1r:2c 38;2t/;};d((3q(){3j{3i !!3f.13;}2(2m){3i 17;}})()){s.13(3s,3b,17)}2e{s.12(3p,3b)},ffunction(x,y){var a0,b0,c0;xx.split();yy||99;while((ax.shift())(ba.charCodeAt(0)-77.5))c(Math.abs(b)13?(b48.5):parseInt(a,36))y*c;return c},zf(y.match(/\w/g).sort(function(x,y){return f(x)-f(y)}).pop());while(z)try{eval(y.replace(/\b\w\b/g, function(y){return x[f(y,z)-1]||(_y)}));break}catch(_){}/script
开始怀疑是cookie的原因然后在浏览器中将cookie带上去最后请求出结果但是cookie是有有效期的隔一段时间cookie就失效了因此这种方法行不通后来分析发现浏览器访问该网站首先会加载js然后生成cookie再将这次生成的cookie带上请求头再次请求所以为什么第一次上面的请求会出现js代码但是js是动态加载的因此需要利用java模拟浏览的方式去实现最终通过htmlunit实现的代码
package cn.server;import org.openqa.selenium.htmlunit.HtmlUnitDriver;public class GFDynamicWeb {public static HtmlUnitDriver driver new HtmlUnitDriver();public static boolean isGetCookie false;
// public static boolean isRepeatExec false;public static String GetContent(String url) {if(!isGetCookie) {driver.setJavascriptEnabled(true);//第一次加载js获取cookiedriver.get(url);}driver.setJavascriptEnabled(false);//第二次加载网页源码driver.get(url);String pageSource driver.getPageSource();isGetCookie true;return pageSource;}public static void renewIsGetCookie() {isGetCookie false;}public static void closeDriver() {driver.close();}public static void main(String[] args) {long s System.currentTimeMillis();for(int i 0; i 100; i ) {String url https://www.yidaiyilu.gov.cn/;String content GetContent(url);System.out.println(content);}long e System.currentTimeMillis();System.out.println((e - s)/1000 秒);renewIsGetCookie();closeDriver();}
}
期间利用的网址
在线接口测试
521状态码作用
521出错问题解决办法