欢迎来到天天文库
浏览记录
ID:50827034
大小:33.09 KB
页数:4页
时间:2020-03-15
《JAVA爬虫获取网页URL源码.docx》由会员上传分享,免费在线阅读,更多相关内容在工程资料-天天文库。
1、packagefileAndStringOperate;importjava.io.BufferedReader;importjava.io.File;importjava.io.FileOutputStream;importjava.io.FileReader;importjava.io.FileWriter;importjava.io.IOException;importjava.io.OutputStream;publicclassReadFileToString{publicstaticvoidmain(String[]args)throwsIOException{Stringfil
2、eData=readtxt("D:\IR\1\JAVA网络爬虫.txt");CountRelatedDocument(fileData);//MyWebRec[]myWebRec=newMyWebRec[2000];ClassifyURL(fileData);}privatestaticStringreadtxt(StringfilePath)throwsIOException{BufferedReaderbr=newBufferedReader(newFileReader(filePath));Stringstr="";Stringr=br.readLine();while(r!=n
3、ull){str+=r+"";r=br.readLine();}returnstr;}privatestaticchar[][]readtxtToChar(StringfilePath)throwsIOException{char[][]data=null;intcurrentline=0;BufferedReaderbr=newBufferedReader(newFileReader(filePath));Stringstr="";Stringr=br.readLine();while(r!=null){str+=r+"";data[currentline++]=str.toCha
4、rArray();r=br.readLine();}returndata;}privatestaticintCountRelatedDocument(StringfileData){intcntRelated=0;intcurrentLine=1;intbaiduCnt=0;intbingCnt=0;intsogouCnt=0;int_360Cnt=0;intState=0;booleanbaiduY[]=newboolean[51];booleanbingY[]=newboolean[51];booleansogouY[]=newboolean[51];boolean_360Y[]=new
5、boolean[51];char[]fileChar=fileData.toCharArray();for(inti=0;i6、7、fileChar[i]=='y'){if(State==1){baiduCnt++;baiduY[c8、urrentLine/3]=true;}elseif(State==2){bingCnt++;bingY[currentLine/3-50]=true;//System.out.println(currentLine/3-50);}elseif(State==3){sogouCnt++;sogouY[currentLine/3-100]=true;}elseif(State==4){_360Cnt++;_360Y[currentLine/3-150]=true;}cntRelated++;}}}try{CaculatePR(baiduY,bingY,_360Y,sogouY,cntRelat9、ed);}catch(Exceptione){e.getMessage();}System.out.println("cntRelated="+cntRelated);System.out.println("baiduCnt="+baiduCnt);System.out.println("bingCnt="+bingCnt);System.out.println("sohuCnt="+sogouCnt);Sy
6、
7、fileChar[i]=='y'){if(State==1){baiduCnt++;baiduY[c
8、urrentLine/3]=true;}elseif(State==2){bingCnt++;bingY[currentLine/3-50]=true;//System.out.println(currentLine/3-50);}elseif(State==3){sogouCnt++;sogouY[currentLine/3-100]=true;}elseif(State==4){_360Cnt++;_360Y[currentLine/3-150]=true;}cntRelated++;}}}try{CaculatePR(baiduY,bingY,_360Y,sogouY,cntRelat
9、ed);}catch(Exceptione){e.getMessage();}System.out.println("cntRelated="+cntRelated);System.out.println("baiduCnt="+baiduCnt);System.out.println("bingCnt="+bingCnt);System.out.println("sohuCnt="+sogouCnt);Sy
此文档下载收益归作者所有