读书人

jsoup java 抓取百度MP3 top500 -冰之

发布时间: 2012-07-18 12:05:40 作者: rapoo

jsoup java 抓取百度MP3 top500 ----冰之龙代码
冰之龙原创 冰之龙代码因为正则表达式不支持中文所以在MP3下载列表中没有包含中文连接要包含中文连接也很容易,自己抓取http MP3用字符串查找jsoup下载地址为[url=http://jsoup.org/download]http://jsoup.org/download[/url]代码
System.out.println("网址:" + uat.myURL);
System.out.println();
}
System.out.println("共有" + i + "个符合结果");
}

public void initUrlsLinksArrayList() {
al.clear();
for (Element link : links) {
UrlAndTitle uat = new UrlAndTitle();
uat.myURL = link.attr("abs:href");
uat.title = trim(link.text(), 35 * 10);
al.add(uat);
}
}

private static String trim(String s, int width) {
if (s.length() > width)
return s.substring(0, width - 1) + ".";
else
return s;
}

public void initEverySongSoSoPara() {
String partHare = "http://mp3.baidu.com/m?rf=top-index&tn=baidump";
String sosoH = "&word=";
String sosoE = "&lm=";

for (UrlAndTitle uat : al) {
if (uat.myURL.contains(partHare)) {
String song = uat.myURL.substring(uat.myURL.indexOf(sosoH)
+ sosoH.length(), uat.myURL.indexOf(sosoE));
SongInfo songInfo = new SongInfo();
if (song.contains("+")) {
song = song.replace('+', ':');
// System.out.println(song);
String[] songI = song.split(":");
songInfo.name = songI[0];
if (songI.length == 1) {

} else
songInfo.actor = songI[1];
songInfo.sosoList = uat.myURL;
songInfolList.add(songInfo);
} else {

songInfo.name = song;
songInfo.sosoList = uat.myURL;
songInfolList.add(songInfo);

}

}
}
}

public void SongInfoArrayListDisplay() {
int i = 0;
for (SongInfo si : songInfolList) {
i++;
System.out.println(i + ":");
System.out.println("歌曲名称:" + si.name);

System.out.println("艺术家:" + si.actor);

System.out.println("网址:" + si.sosoList);
System.out.println();
}
System.out.println("共有" + i + "个符合结果");
}

public void fillMp3DownList(SongInfo songInfo) {
if (songInfo.mp3downList == null) {
songInfo.mp3downList = new ArrayList<String>();
}
ArrayList<String> arrayList = getEveryDownList(songInfo.sosoList);
for (String string : arrayList) {
songInfo.mp3downList.addAll(getMp3List(string));
}
delMP3List(songInfo.mp3downList ) ;
}
public void songInfomp3downListDisplay(SongInfo songInfo){
System.out.println("歌曲名称:" + songInfo.name);
System.out.println("艺术家:" + songInfo.actor);
for (String string : songInfo.mp3downList) {
System.out.println(string);
}
}

public void delMP3List(ArrayList<String> mp3List) {
ArrayList<String> tempList = new ArrayList<String>();
String temp="";
for (String string:mp3List) {
if (!temp.contains(string)) {
temp=temp+' '+string;
tempList.add(string);
}

}
mp3List.clear();
mp3List.addAll(tempList);
}

private ArrayList<String> everyDownList = new ArrayList<String>();

public ArrayList<String> getEveryDownList(String url) {
everyDownList = new ArrayList<String>();
startUrl = url;
if (getUrlContent()) {
initElements();
initUrlsLinksArrayList();
fillEveryDownList();
// UrlsLinksArrayListDisplay();
}
return everyDownList;
}

public void fillEveryDownList() {
String startWith = "http://box.zhangmen.baidu.com/m?word=mp3";
String contains = "baidusg,";
for (UrlAndTitle uat : al) {
if (uat.myURL.startsWith(startWith) && uat.myURL.contains(contains)) {
everyDownList.add(uat.myURL);
// System.out.println(uat.myURL);
}
}
}

public ArrayList<String> getMp3List(String url) {
ArrayList<String> mp3List = new ArrayList<String>();
startUrl = url;
if (getUrlContent()) {
//initElements();
//System.out.println(doc.html());
//getBaiduSongs();
// initUrlsLinksArrayList();
// UrlsLinksArrayListDisplay();
fillMP3List(mp3List);
}
return mp3List;
}

private static String getFilteredContent(String htmlContent, String reg,
int i) {
String content = "";


return content;
}


public void fillMP3List(ArrayList<String> mp3List) {
String endWith = ".mp3";
String no = "...";
Pattern pattern = Pattern.compile("(http://|https://){1}[\\w\\.\\-/:]+");
Matcher matcher = pattern.matcher(doc.html());
//StringBuffer bfr = new StringBuffer();
while(matcher.find()){
String url=matcher.group();
/* bfr.append(url);
bfr.append("\r\n"); */
if (url.contains(endWith)&&!url.contains(no)) {
mp3List.add(url);
//System.out.println(url);
}
//System.out.println(bfr.toString());
}

}

public ArrayList<String> getBaiduSongs(){
ArrayList<String> ss = new ArrayList<String>();
String reg = "(.*?)";
getFilteredContent(doc.html(),reg,0);
return ss;
}

public static void main(String[] args) {
// TODO Auto-generated method stub
String url =
// "http://mp3.baidu.com/m?rf=top-index&tn=baidump3&ct="
// + "134217728&word=因为爱情+王菲,陈奕迅&lm=-1";
"http://list.mp3.baidu.com/top/top500.html";
// "http://67.220.90.30/bbs/archiver/fid-143.html";
// "http://67.220.90.30/bbs/forum-143-5.html";
// "http://205.164.48.253/simple/?f138_57.html";
// "http://tu.uuu9.com/List/List_8.shtml";
MyUrls mu = new MyUrls(url);
if (mu.getUrlContent()) {
mu.initElements();
mu.initUrlsLinksArrayList();
// mu.UrlsLinksArrayListDisplay();
mu.initEverySongSoSoPara();
// mu.SongInfoArrayListDisplay();
for (int i = 0; i < mu.songInfolList.size(); i++) {

mu.fillMp3DownList(mu.songInfolList.get(i));
mu.songInfomp3downListDisplay(mu.songInfolList.get(i));
}
}

}

class UrlAndTitle {
String myURL;

String title;
}

class SongInfo {
String name;
String actor;
String sosoList;
ArrayList<String> mp3downList = null;
}
}

读书人网 >JavaScript

热点推荐