此博客僅爲學習交流,如觸及第三方利益,請及時聯繫本人刪除
一、前言
看標題大家可能會有點疑惑,爲什麼要寫這個看起來沒什麼作用的爬蟲,兩個音樂軟件換着用不香嗎?
基於此問題,我以我個人感受羅列了網易和QQ音樂以下幾個優缺點:
網易雲 | QQ音樂 | |
優點 |
1.推薦功能 2.用戶評論 3.有很多優秀的原創音樂人入駐 |
1.非常強大的版權庫 2.QQ黏性 |
缺點 |
1.版權問題 2.功能越來越雜 |
1.界面花裏胡哨 2.推薦功能有所欠缺 |
我最開始用網易雲的原因就是網易雲的界面功能簡潔,但現在功能太雜,搞得不像是一個音樂軟件。
從用戶角度上來說,我們使用網易雲的目的是爲了聽音樂(當然也有部分是因爲其他原因),良好的評論環境也只能算是錦上添花。
加上前段時間網易發生的暴力裁員事件,不經使我懷疑網易還能否做出令人印象深刻的佳作。
二、爬蟲實現過程
2.1 登錄後通過歌單請求找到歌曲ID
2.2 使用Postman測試
2.3 拼接歌曲ID(此地址的數據無法通過爬蟲抓取) https://music.163.com/#/song?id=149297
2.4 找到頁面渲染時發起到服務器的歌曲信息請求(請求頁面數據)https://music.163.com/song?id=149297
2.5 使用Postman測試請求
2.6 根據Postman填入的信息 用爬蟲抓取返回的數據
2.7 登錄QQ音樂並找到qq音樂的搜索功能接口
2.8 找到qq音樂的歌單接口
2.9 使用用postman測試請求
PS:這裏我猜測騰訊判斷是否登錄 沒有用什麼黑技術的話 那有可能用的是servlet上下文
2.9.1 點開cookie並將cookie值寫入腳本
把這段代碼改爲自己瀏覽器裏的cookie值
成功返回歌單信息
不知道騰訊是定時刷新cookie還是用了什麼可以識別爬蟲僞裝的技術,爬取歌單列表時,cookie裏的部分值可能會變化或者增加幾個值,這樣返回的響應是:{"retcode":1000,"code":1000,"subcode":0,"msg":"no longin"}
由於我不是非常瞭解爬蟲工程,所以我的解決辦法是手動再到網頁獲取(這時候網頁可能會讓你再登錄一次)
2.9.2 接下來找添加歌曲的接口
通過對比各項參數我們得知需要傳入兩個重要參數,mid(歌曲ID)和dirid(歌單ID)
三、代碼實現
考慮到裏邊一些參數涉及到我個人相關,爲了隱私和安全我把參數全都刪掉,有興趣可以根據URL和參數名填寫自己的參數
/**
*腳本入口
*
* 代碼裏的請求參數和cookie修改爲自己賬號相應的參數
*
* @param songNum 收藏第幾首歌
*/
public static void netEasy(Integer songNum){
/**
* 網易雲音樂歌單參數
* 拼裝請求參數修改爲自己想要爬取的指定歌單參數
*/
String url = "https://music.163.com/weapi/v3/playlist/detail?csrf_token=";
HashMap<String, String> requestParams = new HashMap<>();
requestParams.put("params","");
requestParams.put("encSecKey","");
try {
Document responseDocument = Jsoup.connect(url).data(requestParams).post();
// 解析響應體
String responseData = responseDocument.text();
JSONObject responseJson = JSONObject.parseObject(responseData);
JSONObject playlist = responseJson.getJSONObject("playlist");
/**
* QQ音樂cookie參數
* 添加自己瀏覽器存儲的cookie信息
*/
Map<String,String> cookies = new HashMap<>();
cookies.put("AMCV_248F210755B762187F000101%40AdobeOrg","");
cookies.put("LW_PsKey","");
cookies.put("LW_TS","");
cookies.put("LW_pid","");
cookies.put("LW_sid","");
cookies.put("LW_uid","");
cookies.put("RK","");
cookies.put("__v3_c_last_10685","");
cookies.put("__v3_c_review_10685","");
cookies.put("__v3_c_visitor","");
cookies.put("_ga","");
cookies.put("_qpsvr_localtk","");
cookies.put("eas_sid","");
cookies.put("mobileUV","");
cookies.put("o_cookie","");
cookies.put("pac_uid","");
cookies.put("pgv_info","");
cookies.put("pgv_pvi","");
cookies.put("pgv_pvid","");
cookies.put("pgv_si","");
cookies.put("player_exist","");
cookies.put("psrf_access_token_expiresAt","");
cookies.put("psrf_musickey_createtime","");
cookies.put("psrf_qqaccess_token","");
cookies.put("psrf_qqopenid","");
cookies.put("psrf_qqrefresh_token","");
cookies.put("psrf_qqunionid","");
cookies.put("ptcz","");
cookies.put("ptisp","");
cookies.put("ptui_loginuin","");
cookies.put("qm_keyst","");
cookies.put("qqmusic_fromtag","");
cookies.put("ts_last","");
cookies.put("ts_refer","");
cookies.put("ts_uid","");
cookies.put("tvfe_boss_uuid","");
cookies.put("ue_skey","");
cookies.put("ue_ts","");
cookies.put("ue_uid","");
cookies.put("ue_uk","");
cookies.put("uin","");
cookies.put("userAction","");
cookies.put("yplayer_open","");
cookies.put("yq_index","");
cookies.put("yqq_stat","");
cookies.put("yq_playdata","");
cookies.put("yq_playschange","");
cookies.put("yqq_stat","");
String songUrl = "https://music.163.com/song?id=";
// 此處是獲取QQ音樂的自定義歌單
String songLikeList = "https://c.y.qq.com/splcloud/fcgi-bin/songlist_list.fcg?utf8=1";
String likeList = jsoupByLikeList(songLikeList, cookies);
// 自定義歌單列表
JSONObject jsonObject = JSONObject.parseObject(likeList);
System.out.println(jsonObject.toJSONString());
playlist.getJSONArray("trackIds").forEach(tv -> {
JSONObject trackIdsJson = JSONObject.parseObject(tv.toString());
String id = trackIdsJson.get("id").toString();
/**
* 網易雲歌曲參數
* 拼接歌曲地址 https://music.163.com/song?id=
*/
String song = songUrl+id;
Document songDocument = jsoupHtml(song);
Elements songNames = getSongName(songDocument);
songNames.forEach(v -> {
String scriptData = v.toString();
String substring = scriptData.substring(1, scriptData.length());
String reData = substring.substring(substring.indexOf(">") + 1, substring.indexOf("<"));
// 獲取網易雲歌單中的歌曲名稱
String songName = JSONObject.parseObject(reData).get("title").toString();
System.out.println(songName);
// 通過QQ音樂搜索此歌曲
JSONArray jsonArray = qqMusicSearch(songName);
/* jsonArray.forEach(jv -> {
System.out.println(jv);
});*/
// 保證足夠多的歌曲搜索結果
if (jsonArray.size() == 10){
JSONObject thisSong = JSONObject.parseObject(jsonArray.get(songNum).toString());
Object mid = thisSong.get("mid");
if (mid != null){
String dirid = "";
// 判斷是否成功登錄QQ音樂
if ("0".equals(jsonObject.get("retcode").toString())){
JSONArray list = JSONObject.parseArray(jsonObject.get("list").toString());
// 這裏我指定我專爲網易雲創建的歌單
JSONObject songMenu = JSONObject.parseObject(list.get(2).toString());
dirid = songMenu.get("dirid").toString();
// 將歌曲加入歌單
String midId = mid.toString();
String resopnse = addSongByLikeList(midId, dirid, cookies);
System.out.println(resopnse);
}
}
}
});
});
// 根據ID拼接歌曲頁面
} catch (Exception e) {
e.printStackTrace();
}
}
public static Document jsoupHtml(String url){
Document document = null;
try {
// 防止反爬蟲檢測
Thread.sleep(1000);
document = Jsoup.connect(url).userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
.get();
} catch (Exception e) {
e.printStackTrace();
}
return document;
}
public static Elements getSongName(Document document){
Element body = document.body();
Elements script = document.select("script[type=application/ld+json]");
return script;
}
public static JSONArray qqMusicSearch(String songName){
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// w 後面是搜索關鍵字
String url = "https://c.y.qq.com/soso/fcgi-bin/client_search_cp?w=";
String response = jsoupJson(url);
JSONObject ResponseJson = JSONObject.parseObject(response);
JSONArray jsonArray = JSONObject.parseObject(JSONObject.parseObject(ResponseJson.get("data").toString()).get("song").toString()).getJSONArray("list");
return jsonArray;
}
public static String jsoupJson(String url){
Connection.Response execute = null;
try {
// 防止反爬蟲檢測
Thread.sleep(1000);
execute = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
.ignoreContentType(true)
.execute();
} catch (Exception e) {
e.printStackTrace();
}
return execute.body();
}
public static String jsoupByLikeList(String url, Map<String,String> cookies){
Connection.Response execute = null;
try {
execute = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36")
.ignoreContentType(true)
.cookies(cookies)
.execute();
} catch (IOException e) {
e.printStackTrace();
}
return execute.body();
}
// 添加歌曲到指定歌單
public static String addSongByLikeList(String mid,String dirid,Map<String,String> cookies){
try {
// 防止反爬蟲檢測
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
// https://c.y.qq.com/splcloud/fcgi-bin/fcg_music_add2songdir.fcg?g_tk=5381
String url = "https://c.y.qq.com/splcloud/fcgi-bin/fcg_music_add2songdir.fcg?g_tk=5381";
Map<String,String> map = new HashMap<>();
map.put("loginUin","");
map.put("hostUin","");
map.put("format","json");
map.put("inCharset","utf8");
map.put("outCharset","utf-8");
map.put("notice","0");
map.put("platform","");
map.put("needNewCode","");
map.put("uin","");
map.put("midlist",mid);
map.put("typelist","");
map.put("dirid",dirid);
map.put("addtype","");
map.put("formsender","");
map.put("source","");
map.put("r2","");
map.put("r3","");
map.put("utf8","");
map.put("g_tk","");
String text = null;
try {
Document post = Jsoup.connect(url)
.ignoreContentType(true)
.cookies(cookies)
.data(map)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36")
.post();
text = post.text();
} catch (IOException e) {
e.printStackTrace();
}
return text;
}
public static void main(String[] args) {
netEasy(1);
}
{\__/} {\__/}
( ·-·) (·-· )
/ >------------------------------------------------< \
| ☆ |
| ☆ |
| ★ |
| ☆ |
| ☆ |
| |
-------------------------------------