- 4
- 0
- 约8.94千字
- 约 46页
- 2019-04-06 发布于江西
- 举报
搜索引擎 (lucene 、 nutch) 实例一 简单抓取单一页面所有邮箱地址代码。(Java网络编程与正则表达式) package TestWWW; import java.io.BufferedReader; import java.io.DataInputStream; import java.io.IOException; import java.io.InputStreamReader; import .MalformedURLException; import .URL; import .URLConnection; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ConnectionTest { public static void main(String[] args) { try { URL sina = new URL(); URLConnection sinaConnection = sina.openConnection(); BufferedReader br =new BufferedReader( new InputStreamReader(sinaConnection.getInputStream())); String str = ; while( (str = br.readLine()) != null){ //System.out.println(str); parse(str); } System.out.println(测试结束); br.close(); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e){ e.printStackTrace(); } } private static void parse(String str){ Pattern p = Ppile([\\w[.-]]+@[\\w[.-]]+\\.[\\w]+); //Pattern p = Ppile(a.+\\u005B.+.+a); //Pattern p = Ppile(a.+.*?通知书.*.+a);//先吞到.*再与全部匹配 //Pattern p = Ppile(a.+gray_bg.+target.+.*.+a); Matcher m = p.matcher(str);//独占的全吃不吐出,再读后面的 while(m.find()){ System.out.println(m.group()); } } } public static void main(String[] args) { try { URL sina = new URL(); URLConnection sinaConnection = sina.openConnection(); BufferedReader br =new BufferedReader( new InputStreamReader(sinaConnection.getInputStream())); String str = ; while( (str = br.readLine()) != null){ //System.out.println(str); parse(str); } System.out.println(测试结束); br.close(); } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e){ e.printStackTrace(); } } private static void parse(String str){ Pattern p = Ppile([\\w[.-]]+@[\\w[.-]]+\\.[\\w]+); Matcher m = p.matcher(str);//独占的全吃不吐出,再读后面的 while(m.find()){ System.out.println(m.grou
原创力文档

文档评论(0)