Java實現網絡爬蟲 案例代碼1:獲取天氣預報信息

案例1:獲取天氣預報信息

需求說明
搭建開發環境,實現從“hao123.com”中獲取當地天氣預報信息,從控制檯輸出結果
分析
訪問網址:https://www.hao123.com
分析網站URL、文檔內容特徵
獲取網頁內容
拆分出需求內容
控制檯輸出結果
在這裏插入圖片描述


import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WeatheDemo {

	public static void main(String[] args) {
		// 根據網頁結構編寫正則,創建pattern對象
		Pattern p_city = Pattern
				.compile("<span class=\"weather2-item\" data-hook=\"name\">(.*?)</span>");
		Pattern p_info_today = Pattern
				.compile("<div class=\"weather2__info-txt\" data-hook=\"weather\" slider-type=\"item\">(.*?)</div>");
		Pattern p_info_temperature_today = Pattern
				.compile("<div class=\"weather2__temperature\" data-hook=\"tempera\">(.*?)</div>");
		Pattern p_info_tomorrow = Pattern
				.compile("<div class=\"weather2__info-txt\" data-hook=\"weather-tomorrow\">(.*?)</div>");
		Pattern p_info_temperature_tomorrow = Pattern
				.compile("<div class=\"weather2__temperature\" data-hook=\"tempera-tomorrow\">(.*?)</div>");

		Matcher m_city;
		Matcher m_info_today;
		Matcher m_info_temperature_today;
		Matcher m_info_tomorrow;
		Matcher m_info_temperature_tomorrow;

		System.out.println("------------------開始獲取天氣信息-------------------");
		try {
			// 創建頁面的url對象
			URL url = new URL("https://www.hao123.com");
			// 創建網絡讀取流
			BufferedReader reader = new BufferedReader(new InputStreamReader(
					url.openStream(), "utf8"));
			// 讀取網絡內容網絡流BufferReader
			String str = null;
			while ((str = reader.readLine()) != null) {
				m_city = p_city.matcher(str.toString());
				m_info_today = p_info_today.matcher(str.toString());
				m_info_temperature_today = p_info_temperature_today.matcher(str
						.toString());
				m_info_tomorrow = p_info_tomorrow.matcher(str.toString());
				m_info_temperature_tomorrow = p_info_temperature_tomorrow
						.matcher(str.toString());

				// 獲取地區
				Boolean isEx = m_city.find();
				if (isEx) {
					String city = m_city.group();
					// 清洗得到的數據
					city = city.replace("<span class=\"weather2-item\" data-hook=\"name\">","")
							.replace("</span>", "");
					System.out.println("城市:" + city);
				}

				// 獲取今日天氣
				Boolean isEx1 = m_info_today.find();
				if (isEx1) {
					String info_today = m_info_today.group();
					// 清洗得到的數據
					info_today = info_today
							.replace("<div class=\"weather2__info-txt\" data-hook=\"weather\" slider-type=\"item\">","")
							.replace("</div>", "");
					System.out.println("今日天氣:" + info_today);
				}

				// 獲取今日溫度
				Boolean isEx2 = m_info_temperature_today.find();
				if (isEx2) {
					String temperature_today = m_info_temperature_today.group();
					// 清洗得到的數據
					temperature_today = temperature_today
							.replace("<div class=\"weather2__temperature\" data-hook=\"tempera\">","")
							.replace("</div>", "");
					System.out.println("今日溫度:" + temperature_today);
				}

				// 獲取明天天氣
				Boolean isEx3 = m_info_tomorrow.find();
				if (isEx3) {
					String info_tomorrow = m_info_tomorrow.group();
					// 清洗得到的數據
					info_tomorrow = info_tomorrow
							.replace("<div class=\"weather2__info-txt\" data-hook=\"weather-tomorrow\">","")
							.replace("</div>", "");
					System.out.println("明日天氣:" + info_tomorrow);
				}

				// 獲取明天溫度
				Boolean isEx4 = m_info_temperature_tomorrow.find();
				if (isEx4) {
					String temperature_tomorrow = m_info_temperature_tomorrow
							.group();
					// 清洗得到的數據
					temperature_tomorrow = temperature_tomorrow
							.replace("<div class=\"weather2__temperature\" data-hook=\"tempera-tomorrow\">","")
							.replace("</div>", "");
					System.out.println("明日溫度:" + temperature_tomorrow);
				}
			}
			reader.close();
		} catch (Exception e) {
			System.out.println("下載失敗");
			e.printStackTrace();
		}
		System.out.println("-----------------完成天氣信息獲取--------------------");
	}
}

運行結果示例:
在這裏插入圖片描述

超全面的測試IT技術課程,0元立即加入學習!有需要的朋友戳:


騰訊課堂測試技術學習地址

歡迎轉載,但未經作者同意請保留此段聲明,並在文章頁面明顯位置給出原文鏈接。

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章