大学IT网 - 最懂大学生的IT学习网站! QQ资料交流群:367606806
当前位置:大学IT网 > Java技巧 > java HttpClient+Jsoup打造灌水利器

java HttpClient+Jsoup打造灌水利器

关键词:HttpClientJsoupjavaHttpClie  阅读(753) 赞(10)

[摘要]本文主要是对java HttpClient+Jsoup打造灌水利器的讲解,希望对大家学习java HttpClient+Jsoup打造灌水利器有所帮助。

    不知道多久以前就有过写个自动回帖的小软件一直没有实现,最近闲下来了遂研究了下,本人小菜对于HTTP协议一知半解只能在请教google大神了,把我的想法跟google大神说了之后,google大神说这小子不错,这是为防火事业做贡献啊!特赐予小弟以下神器:
    1、HttpClient 4.3.1 (GA)
    以下列出的是 HttpClient 提供的主要的功能,要知道更多详细的功能可以参见 HttpClient 的主页。
    实现了所有 HTTP 的方法(GET,POST,PUT,HEAD 等)
    支持自动转向
    支持 HTTPS 协议
    支持代理服务器等
    2、Jsoup
    jsoup 的主要功能如下
    从一个 URL,文件或字符串中解析 HTML
    使用 DOM 或 CSS 选择器来查找、取出数据
    可操作 HTML 元素、属性、文本
    使用与jquery几乎一样的语法
    废话不多说直接进入正题,在HTTPClient源码包内包含example文件夹此文件夹内包含一些基本用法这些例子入门足够了找到ClientFormLogin.java具体解释注释已经很清楚了大致意思就是模拟HTTP请求存储cookies.
    因为此网站对登录做了特殊处理所以与标准的DZ论坛可能会有些出入请自行修改
    对网站的分析使用的chrome自带的审查元素,这个折腾了不少时间
    登录地址:http://passport.dakele.com/login.do?product=bbs
    输入错误的用户名和密码会发现实际登录地址为http://passport.dakele.com/logon.do 注意【i/n的区别刚开始没注意以为见鬼了】
    返回错误信息
    {"err_msg":"帐号或密码错误"}
    输入正确信息返回
    {"result":true,"redirect":http://bbs.dakele.com/member.php?mod=logging&action=login&loginsubmit=yes&infloat=yes&lssubmit=yes&inajax=0&fastloginfield=username&quickforward=yes&handlekey=ls&cookietime=2592000&remember=0&username=youname&AccessKey=[]}
    直接输入rediret连接和正常登录
    获取跳转链接:
   
    private LoginResult getRedirectUrl(){
    LoginResult loginResult = null;
    CloseableHttpClient httpClient = HttpClients.createDefault();
    HttpPost httpost = new HttpPost(LOGINURL);
    httpost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    httpost.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
    httpost.setHeader("Cache-Control", "max-age=0");
    httpost.setHeader("Connection", "keep-alive");
    httpost.setHeader("Host", "passport.dakele.com");
    httpost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
    List <NameValuePair> nvps = new ArrayList <NameValuePair>();
    nvps.add(new BasicNameValuePair("product", "bbs"));
    nvps.add(new BasicNameValuePair("surl", "http://bbs.dakele.com/"));
    nvps.add(new BasicNameValuePair("username", "yourname"));//用户名
    nvps.add(new BasicNameValuePair("password", "yourpass"));//密码
    nvps.add(new BasicNameValuePair("remember", "0"));
    httpost.setEntity(new UrlEncodedFormEntity(nvps, Consts.UTF_8));
    CloseableHttpResponse response2 = null;
    try {
    response2 = httpClient.execute(httpost);
    if(response2.getStatusLine()。getStatusCode()==200){
    HttpEntity entity = response2.getEntity();
    String entityString = EntityUtils.toString(entity);
    JSONArray jsonArray = JSONArray.fromObject("["+entityString+"]");
    JsonConfig jsonConfig=new JsonConfig();
    jsonConfig.setArrayMode(JsonConfig.MODE_OBJECT_ARRAY);
    jsonConfig.setRootClass(LoginResult.class);
    LoginResult[] results= (LoginResult[]) JSONSerializer.toJava( jsonArray, jsonConfig );
    if(results.length==1){
    loginResult = results[0];
    }
    }
    } catch (ClientProtocolException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }finally{
    try {
    response2.close();
    httpClient.close();
    } catch (IOException e) {
    e.printStackTrace();
    }
    }
    return loginResult;
    }
   


    登录代码:
   
    public boolean login(){
    boolean flag = false;
    LoginResult loginResult = getRedirectUrl();
    if(loginResult.getResult()。equals("true")){
    cookieStore = new BasicCookieStore();
    globalClient = HttpClients.custom()。setDefaultCookieStore(cookieStore)。build();
    HttpGet httpGet = new HttpGet(loginResult.getRedirect());
    httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
    httpGet.setHeader("Connection", "keep-alive");
    httpGet.setHeader("Host", HOST);
    httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
    try {
    globalClient.execute(httpGet);
    } catch (ClientProtocolException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    List<Cookie> cookies2 = cookieStore.getCookies();
    if (cookies2.isEmpty()) {
    log.error("cookie is empty");
    } else {
    for (int i = 0; i < cookies2.size(); i++) {
    }
    }
    }
    return flag;
    }
   
    到此已经登录成功可以进行只有登录号才能做的事了,什么?你不知道当然是灭火了
    首先取得需要回复的帖子地址,列表页比较有规律所有没有写自动发现的所以写了个循环@1
   
    for(int i=1;i<200;i++){
    String basurl="http://bbs.dakele.com/forum-43-"+i+".html";
    log.info(basurl);
    List<String> urls = dakele.getThreadURLs(basurl);
    for(String url:urls){
    //log.info(url);
    ReplayContent content = dakele.preReplay(url);
    if(content!=null){
    log.info(content.getUrl());
    log.info(content.getMessage());
    //dakele.replay( content);
    //Thread.sleep(15300);
    }
    }
    }
   
    在列表页内获取帖子地址:
   
    String html = EntityUtils.toString(entity);
    Document document = Jsoup.parse(html,HOST);
    Elements elements=document.select("tbody[id^=normalthread_] > tr > td.new > a.xst");
    for(int i=0;i<elements.size();i++){
    Element e = elements.get(i);
    urList.add(e.attr("abs:href"));
    }
   
    在需要回复的帖子内获得需要提交的form表单地址以及构造回复内容
   
    public ReplayContent preReplay(String url){
    ReplayContent content = null;
    HttpGet get  = new HttpGet(url);
    get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    get.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
    get.setHeader("Connection", "keep-alive");
    get.setHeader("Host", HOST);
    get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
    try {
    CloseableHttpResponse response = globalClient.execute(get);
    HttpEntity entity = response.getEntity();
    String html = EntityUtils.toString(entity);
    Document document = Jsoup.parse(html, HOST);
    Element postForm = document.getElementById("fastpostform");
    if(!postForm.toString()。contains("您现在无权发帖")){
    content = new ReplayContent();
    content.setUrl(url);
    log.debug(postForm.attr("abs:action"));
    content.setAction(postForm.attr("abs:action"));
    ////////
    Elements teElements = document.select("td[id^=postmessage_]");
    String message = "";
    for(int i=0;i<teElements.size();i++){
    String temp = teElements.get(i)。html()。replaceAll( "(?is)<.*?>", "");
    if(temp.contains("发表于")){
    String[] me = temp.split("\\s+");
    temp = me[me.length-1];
    }
    message+=temp.replaceAll("\\s+", "");
    }
    log.debug(message.replaceAll("\\s+", ""));
    ///////////////
    /*取最后一条评论
    Element messageElement= document.select("td[id^=postmessage_]")。last();
    //                String message = messageElement.html()。replaceAll("\\&[a-zA-Z]{1,10};", "")。replaceAll("<[^>]*>", "")。replaceAll("[(/>)<]", "");
    String message = messageElement.html()。replaceAll( "(?is)<.*?>", "");
    */
    if(message.contains("发表于")){
    String[] me = message.split("\\s+");
    message = me[me.length-1];
    }
    content.setMessage(message.replaceAll("&nbsp;", "")。replaceAll("上传", "")。replaceAll("附件", "")。replaceAll("下载", ""));
    Elements inputs = postForm.getElementsByTag("input");
    for(Element input:inputs){
    log.debug(input.attr("name")+":"+input.attr("value"));
    if(input.attr("name")。equals("posttime")){
    content.setPosttime(input.attr("value"));
    }else if(input.attr("name")。equals("formhash")){
    content.setFormhash(input.attr("value"));
    }else if(input.attr("name")。equals("usesig")){
    content.setUsesig(input.attr("value"));
    }else if(input.attr("name")。equals("subject")){
    content.setSubject(input.attr("value"));
    }
    }
    }else{
    log.warn("您现在无权发帖:"+url);
    }
    } catch (ClientProtocolException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    return content;
    }
   
    地址有了,内容有了接下来开始放水了
   
    public void replay(ReplayContent content){
    HttpPost httpost = new HttpPost(content.getAction());
    httpost.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
    httpost.setHeader("Accept-Language", "zh-CN,zh;q=0.8");
    httpost.setHeader("Cache-Control", "max-age=0");
    httpost.setHeader("Connection", "keep-alive");
    httpost.setHeader("Host", HOST);
    httpost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36");
    List <NameValuePair> nvps = new ArrayList <NameValuePair>();
    nvps.add(new BasicNameValuePair("posttime", content.getPosttime()));
    nvps.add(new BasicNameValuePair("formhash", content.getFormhash()));
    nvps.add(new BasicNameValuePair("usesig", content.getUsesig()));
    nvps.add(new BasicNameValuePair("subject", content.getSubject()));
    nvps.add(new BasicNameValuePair("message", content.getMessage()));
    httpost.setEntity(new UrlEncodedFormEntity(nvps, Consts.UTF_8));
    //HTTP 三次握手 必须处理响应刚开始没注意卡在这了
    CloseableHttpResponse response2 = null;
    try {
    response2 = globalClient.execute(httpost);
    //log.info(content.getAction());
    //log.info(content.getMessage());
    HttpEntity entity = response2.getEntity();
    EntityUtils.consume(entity);
    //            BufferedWriter bw= new BufferedWriter(new FileWriter("d:/tt1.html"));
    //            bw.write(EntityUtils.toString(response2.getEntity()));
    //            bw.flush();
    //            bw.close();
    //System.out.println(EntityUtils.toString(response2.getEntity()));
    } catch (ClientProtocolException e) {
    e.printStackTrace();
    } catch (IOException e) {
    e.printStackTrace();
    }
    }
   



相关评论