大学IT网 - 最懂大学生的IT学习网站! QQ资料交流群:367606806
当前位置:大学IT网 > Java技巧 > 如何用Java获取网页源代码

如何用Java获取网页源代码

关键词:Java如何用Java获取网页源代码  阅读(640) 赞(16)

[摘要]本文主要是对如何用Java获取网页源代码的讲解,希望对大家学习如何用Java获取网页源代码有所帮助。

  import java.io.BufferedReader;

  import java.io.IOException;

  import java.io.InputStream;

  import java.io.InputStreamReader;

  import java.net.HttpURLConnection;

  import java.net.URL;

  public class HtmlParser {

  public static String getHtmlContent(URL url, String encode) {

  StringBuffer contentBuffer = new StringBuffer();

  int responseCode = -1;

  HttpURLConnection con = null;

  try {

  con = (HttpURLConnection) url.openConnection();

  con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载

  con.setConnectTimeout(60000);

  con.setReadTimeout(60000);

  // 获得网页返回信息码

  responseCode = con.getResponseCode();

  if (responseCode == -1) {

  System.out.println(url.toString() + " : connection is failure...");

  con.disconnect();

  return null;

  }

  if (responseCode >= 400) // 请求失败

  {

  System.out.println("请求失败:get response code: " + responseCode);

  con.disconnect();

  return null;

  }

  InputStream inStr = con.getInputStream();

  InputStreamReader istreamReader = new InputStreamReader(inStr, encode);

  BufferedReader buffStr = new BufferedReader(istreamReader);

  String str = null;

  while ((str = buffStr.readLine()) != null)

  contentBuffer.append(str);

  inStr.close();

  } catch (IOException e) {

  e.printStackTrace();

  contentBuffer = null;

  System.out.println("error: " + url.toString());

  } finally {

  con.disconnect();

  }

  return contentBuffer.toString();

  }

  public static String getHtmlContent(String url, String encode) {

  if (!url.toLowerCase().startsWith("http://")) {

  url = "http://" + url;

  }

  try {

  URL rUrl = new URL(url);

  return getHtmlContent(rUrl, encode);

  } catch (Exception e) {

  e.printStackTrace();

  return null;

  }

  }

  public static void main(String argsp[]){

  System.out.println(getHtmlContent ;

  }

  }



相关评论