java 常用操作工具

Java中,操作文件(讀寫內容)工具、讀寫properties文件工具、序列化工具、正則提取工具、網頁抓取和解析工具。

  1. package com.shao.utils;

  2. import java.io.BufferedReader;

  3. import java.io.BufferedWriter;

  4. import java.io.File;

  5. import java.io.FileNotFoundException;

  6. import java.io.FileReader;

  7. import java.io.FileWriter;

  8. import java.io.IOException;

  9. import java.util.ArrayList;

  10. import java.util.Iterator;

  11. import java.util.List;

  12. publicclass FileUtil {

  13. /**

  14. * 從文件中讀取數據,將內容放置到List容器中

  15. * @param fileStr

  16. * @return

  17. */

  18. publicstatic List<String> getDataFromFile(String fileStr){

  19. List<String> data = null;

  20. if(fileStr!=null && fileStr.length()>0){

  21. try {

  22. File file=new File(fileStr);

  23. if(!file.exists()||file.isDirectory()){

  24. thrownew FileNotFoundException();

  25. }else{

  26. BufferedReader br = new BufferedReader(new FileReader(file));

  27. data = new ArrayList<String>();

  28. String temp = br.readLine();

  29. while(temp!=null){

  30. data.add(temp);

  31. temp = br.readLine();

  32. }

  33. }

  34. } catch (FileNotFoundException e) {

  35. e.printStackTrace();

  36. } catch (IOException e) {

  37. e.printStackTrace();

  38. }

  39. }

  40. return data;

  41. }

  42. /**

  43. * 將List容器中的數據寫入到文件中

  44. * @param fileStr

  45. */

  46. publicstaticvoid writeDataToFile(String fileStr,List<String> data){

  47. if(data==null || data.size()==0){

  48. return;

  49. }

  50. try {

  51. FileWriter writer = new FileWriter(fileStr);

  52. BufferedWriter bw = new BufferedWriter(writer);

  53. for (Iterator<String> iterator = data.iterator(); iterator.hasNext();) {

  54. String temp = (String) iterator.next();

  55. bw.write(temp+"\n");

  56. }

  57. bw.close();

  58. writer.close();

  59. } catch (IOException e) {

  60. e.printStackTrace();

  61. }

  62. }

  63. /**

  64. * 從文件中讀取數據, 返回文件內容

  65. * @param fileStr

  66. * @return

  67. */

  68. publicstatic String getContentFromFile(String fileStr){

  69. StringBuffer sb = new StringBuffer();

  70. if(fileStr!=null && fileStr.length()>0){

  71. try {

  72. File file=new File(fileStr);

  73. if(!file.exists()||file.isDirectory()){

  74. thrownew FileNotFoundException();

  75. }else{

  76. BufferedReader br = new BufferedReader(new FileReader(file));

  77. String temp = br.readLine();

  78. while(temp!=null){

  79. sb.append(temp+"\n");

  80. temp = br.readLine();

  81. }

  82. }

  83. } catch (FileNotFoundException e) {

  84. e.printStackTrace();

  85. } catch (IOException e) {

  86. e.printStackTrace();

  87. }

  88. }

  89. return sb.toString();

  90. }

  91. /**

  92. * 將數據內容寫入到文件中

  93. * @param fileStr

  94. */

  95. publicstaticvoid writeContentToFile(String fileStr,String content){

  96. try {

  97. FileWriter writer = new FileWriter(fileStr);

  98. BufferedWriter bw = new BufferedWriter(writer);

  99. bw.write(content);

  100. bw.close();

  101. writer.close();

  102. } catch (IOException e) {

  103. e.printStackTrace();

  104. }

  105. }

  106. /**

  107. * 從給定的目錄中讀取所有的文件內容

  108. * @param dir

  109. * @return

  110. */

  111. publicstatic List<String> getContentsFromDir(File file){

  112. List<String> cons = null;

  113. if(file.isDirectory()){

  114. cons = new ArrayList<String>();

  115. File[] files = file.listFiles();

  116. for (int i = 0; i < files.length; i++) {

  117. File f = files[i];

  118. if(f.isDirectory()){

  119. List<String> temp = getContentsFromDir(f);

  120. for (Iterator iterator = temp.iterator(); iterator

  121. .hasNext();) {

  122. String c = (String) iterator.next();

  123. cons.add(c);

  124. }

  125. }else{

  126. String con = getContentFromFile(f.getAbsolutePath());

  127. cons.add(con);

  128. }

  129. }

  130. }

  131. return cons;

  132. }

  133. }


  1. package com.shao.utils;

  2. import java.io.BufferedInputStream;

  3. import java.io.FileInputStream;

  4. import java.io.FileNotFoundException;

  5. import java.io.FileOutputStream;

  6. import java.io.IOException;

  7. import java.io.InputStream;

  8. import java.io.OutputStream;

  9. import java.util.Properties;

  10. /**

  11. * properties文件處理工具

  12. * @author heshaopeng

  13. * @date 2012-11-23

  14. */

  15. publicclass PropertiesUtil {

  16. /**

  17. * 讀取配置文件

  18. * @param file

  19. * @return

  20. */

  21. publicstatic Properties readPropertiesFile(String file){

  22. Properties prop=null;

  23. try {

  24. BufferedInputStream is=new BufferedInputStream(new FileInputStream(file));

  25. prop=new Properties();

  26. prop.load(is);

  27. } catch (FileNotFoundException e) {

  28. System.out.println("File Not Found!");

  29. e.printStackTrace();

  30. } catch (IOException e) {

  31. e.printStackTrace();

  32. }

  33. return prop;

  34. }

  35. /*

  36. * 寫資源文件

  37. * @param filename

  38. * @param p

  39. */

  40. publicstaticvoid writePropertiesFile(String filename,Properties p)

  41. {

  42. try

  43. {

  44. OutputStream outputStream = new FileOutputStream(filename);

  45. p.store(outputStream,"");

  46. outputStream.close();

  47. }

  48. catch (IOException e)

  49. {

  50. e.printStackTrace();

  51. }

  52. }

  53. }


  1. package com.shao.utils;

  2. import java.io.BufferedInputStream;

  3. import java.io.FileInputStream;

  4. import java.io.FileNotFoundException;

  5. import java.io.FileOutputStream;

  6. import java.io.IOException;

  7. import java.io.InputStream;

  8. import java.io.OutputStream;

  9. import java.util.Properties;

  10. /**

  11. * properties文件處理工具

  12. * @author heshaopeng

  13. * @date 2012-11-23

  14. */

  15. publicclass PropertiesUtil {

  16. /**

  17. * 讀取配置文件

  18. * @param file

  19. * @return

  20. */

  21. publicstatic Properties readPropertiesFile(String file){

  22. Properties prop=null;

  23. try {

  24. BufferedInputStream is=new BufferedInputStream(new FileInputStream(file));

  25. prop=new Properties();

  26. prop.load(is);

  27. } catch (FileNotFoundException e) {

  28. System.out.println("File Not Found!");

  29. e.printStackTrace();

  30. } catch (IOException e) {

  31. e.printStackTrace();

  32. }

  33. return prop;

  34. }

  35. /*

  36. * 寫資源文件

  37. * @param filename

  38. * @param p

  39. */

  40. publicstaticvoid writePropertiesFile(String filename,Properties p)

  41. {

  42. try

  43. {

  44. OutputStream outputStream = new FileOutputStream(filename);

  45. p.store(outputStream,"");

  46. outputStream.close();

  47. }

  48. catch (IOException e)

  49. {

  50. e.printStackTrace();

  51. }

  52. }

  53. }


  1. package com.shao.utils;

  2. import java.util.ArrayList;

  3. import java.util.List;

  4. import java.util.regex.Matcher;

  5. import java.util.regex.Pattern;

  6. /**

  7. * 正則表達式使用

  8. * @author Administrator

  9. *

  10. */

  11. publicclass RegxUtil {

  12. /**

  13. * 利用正則表達式解析網頁內容

  14. * @param s 全部網頁內容

  15. * @param regex 正則表達式

  16. * @return

  17. */

  18. public List<String> parseContent(String content,String regex) {

  19. //regex = "(((?<=(<a))[\\s\\S]*?(?=(</a>))))";

  20. List<String> list = new ArrayList<String>();

  21. Pattern pa = Pattern.compile(regex);

  22. Matcher ma = pa.matcher(content);

  23. while (ma.find()) {

  24. String ss = ma.group();

  25. list.add(ss);

  26. }

  27. return list;

  28. }

  29. }


  1. package com.shao.utils;

  2. import java.io.File;

  3. import java.io.FileInputStream;

  4. import java.io.FileOutputStream;

  5. import java.io.InputStream;

  6. import java.io.ObjectInputStream;

  7. import java.io.ObjectOutputStream;

  8. /**

  9. * 序列化工具

  10. * @author heshaopeng

  11. * @date 2012-11-23

  12. */

  13. publicclass SerializationUtil {

  14. privatefinalstatic String SERIALIZATION_PATH = "file2uuid.map";

  15. /**

  16. * 將對象序列化到磁盤文件中

  17. * @param o

  18. * @throwsException

  19. */

  20. publicstaticvoid writeObject(Object o,String path) throws Exception{

  21. File f=new File(path);

  22. if(f.exists()){

  23. f.delete();

  24. }

  25. FileOutputStream os=new FileOutputStream(f);

  26. ObjectOutputStream oos=new ObjectOutputStream(os);

  27. oos.writeObject(o);

  28. oos.close();

  29. os.close();

  30. }

  31. /**

  32. *反序列化,將磁盤文件轉化爲對象

  33. *@return Object

  34. *@throwsException

  35. */

  36. publicstatic Object readObject(String path) throws Exception{

  37. File f=new File(path);

  38. if(!f.exists()){

  39. returnnull;

  40. }else{

  41. InputStream is=new FileInputStream(f);

  42. ObjectInputStream ois=new ObjectInputStream(is);

  43. return ois.readObject();

  44. }

  45. }

  46. }


  1. package com.shao.spider;

  2. import java.io.BufferedReader;

  3. import java.io.IOException;

  4. import java.io.InputStreamReader;

  5. import java.net.MalformedURLException;

  6. import java.net.URL;

  7. import java.util.ArrayList;

  8. import java.util.List;

  9. import java.util.regex.Matcher;

  10. import java.util.regex.Pattern;

  11. import com.shao.utils.FileUtil;

  12. /**

  13. * 抓取網頁

  14. * @author Administrator

  15. *

  16. */

  17. publicclass WebSpider {

  18. /**

  19. * 讀取一個網頁全部內容

  20. */

  21. public String getOneHtml(String htmlurl) throws IOException {

  22. URL url;

  23. String temp;

  24. StringBuffer sb = new StringBuffer();

  25. try {

  26. //1.根據網址,創建一個URL對象

  27. url = new URL(htmlurl);

  28. //2.讀取網頁全部內容,採用utf-8編碼方式來讀取

  29. //url.openStream()獲取輸入流,並用BufferedReader進行封裝

  30. BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream(), "utf-8"));

  31. while ((temp = in.readLine()) != null) {

  32. if(temp.trim().length()>0){

  33. sb.append(temp+"\n");

  34. }

  35. }

  36. in.close();

  37. } catch (MalformedURLException me) {

  38. System.out.println("你輸入的URL格式有問題!請仔細輸入");

  39. me.getMessage();

  40. throw me;

  41. } catch (IOException e) {

  42. e.printStackTrace();

  43. throw e;

  44. }

  45. return sb.toString();

  46. }

  47. /**

  48. * 抓取網頁,並保存爲html文件

  49. * @param htmlURL

  50. */

  51. publicvoid getHtmlFileFromURL(String filename,String htmlURL){

  52. try {

  53. String content = getOneHtml(htmlURL);

  54. List<String> data = new ArrayList<String>();

  55. data.add(content);

  56. FileUtil.writeDataToFile(filename, data);

  57. } catch (IOException e) {

  58. e.printStackTrace();

  59. }

  60. }

  61. /**

  62. * 將網頁內容寫入到文件,按照網頁標題保存文件

  63. * @param htmlURL

  64. */

  65. publicvoid writeHtmlToFile(String path,String content){

  66. String filename = path+getTitle(content)+".html";

  67. FileUtil.writeContentToFile(filename, content);

  68. }

  69. /**

  70. *

  71. * @param s

  72. * @return 獲得網頁標題

  73. */

  74. public String getTitle(String s) {

  75. String regex;

  76. String title = "";

  77. List<String> list = new ArrayList<String>();

  78. regex = "<title>.*?</title>";

  79. list = parseContent(s,regex);

  80. for (int i = 0; i < list.size(); i++) {

  81. title = title + list.get(i);

  82. }

  83. return outTag(title);

  84. }

  85. /**

  86. *

  87. * @param s

  88. * @return 去掉標記

  89. */

  90. public String outTag(final String s) {

  91. return s.replaceAll("<.*?>", "");

  92. }

  93. /**

  94. * 利用正則表達式解析網頁內容

  95. * @param s 全部網頁內容

  96. * @param regex 正則表達式

  97. * @return

  98. */

  99. public List<String> parseContent(String content,String regex) {

  100. //regex = "(((?<=(<a))[\\s\\S]*?(?=(</a>))))";

  101. List<String> list = new ArrayList<String>();

  102. Pattern pa = Pattern.compile(regex);

  103. Matcher ma = pa.matcher(content);

  104. while (ma.find()) {

  105. String ss = ma.group();

  106. list.add(ss);

  107. }

  108. return list;

  109. }

  110. publicstaticvoid main(String[] args) throws IOException {

  111. WebSpider wc = new WebSpider();

  112. String urlhtml = "http://bus.mapbar.com/shenzhen/station_list/A.shtml";

  113. /*//抓取這個網頁 的內容

  114. String con = wc.getOneHtml(urlhtml);

  115. System.out.println(con);*/

  116. /*//將抓取的頁面保存爲html頁面

  117. wc.getHtmlFileFromURL("A.html", urlhtml);*/

  118. /*//按照抓物的網頁的標題保存網頁文件

  119. String con = wc.getOneHtml(urlhtml);

  120. wc.writeHtmlToFile("",con);*/

  121. //============測試批量抓取網頁,並保存爲文件========================

  122. /*String urlhtml2 = "http://bus.mapbar.com/shenzhen/station_list/%%.shtml";

  123. String[] ss = {"A","B","C","D","E","F","G",

  124. "H","I","J","K","L","M","N",

  125. "O","P","Q","R","S","T",

  126. "U","V","W","X","Y","Z",

  127. "1","2","3","4","5","6","7","8","9"};

  128. List<String> htmlContent = new ArrayList<String>();

  129. for (int i = 0; i < ss.length; i++) {

  130. String temp = urlhtml2.replace("%%", ss[i]);

  131. String con = wc.getOneHtml(temp);

  132. wc.writeHtmlToFile("", con);

  133. }*/

  134. /*//從抓取後的網頁文件中讀取網頁內容

  135. String path = "深圳公交站點查詢列表-- A字頭-- Mapbar深圳公交.html";

  136. String content = FileUtil.getContentFromFile(path);

  137. System.out.println(content);*/

  138. // File file = new File("test.txt");

  139. // String path = file.getAbsolutePath();

  140. // System.out.println(path);

  141. //============測試批量抓取網頁,並保存爲文件,到特定目錄下========================

  142. /*String urlhtml2 = "http://bus.mapbar.com/shenzhen/station_list/%%.shtml";

  143. String[] ss = {"A","B","C","D","E","F","G",};

  144. List<String> htmlContent = new ArrayList<String>();

  145. for (int i = 0; i < ss.length; i++) {

  146. String temp = urlhtml2.replace("%%", ss[i]);

  147. String con = wc.getOneHtml(temp);

  148. wc.writeHtmlToFile("G:/test/", con);

  149. }*/

  150. //============從特定目錄下,讀取所有抓取下來的文件內容========================

  151. /*File file = new File("G:/test/");

  152. List<String> cons = FileUtil.getContentsFromDir(file);

  153. System.out.println(cons.size());*/

  154. }

  155. }


  1. package com.shao.spider;

  2. import java.awt.image.BufferedImage;

  3. import java.io.BufferedReader;

  4. import java.io.File;

  5. import java.io.FileOutputStream;

  6. import java.io.IOException;

  7. import java.io.InputStream;

  8. import java.io.InputStreamReader;

  9. import java.io.OutputStream;

  10. import java.net.MalformedURLException;

  11. import java.net.URL;

  12. import javax.imageio.ImageIO;

  13. /**

  14. * 抓取所有文本、媒體等資源

  15. * @author Administrator

  16. *

  17. */

  18. publicclass AllSpider {

  19. /**

  20. * 抓取圖片,採用輸入流的方式

  21. */

  22. publicvoid getImageFromWeb(String htmlurl) throws IOException {

  23. URL url;

  24. try {

  25. // 1.根據網址,創建一個URL對象

  26. url = new URL(htmlurl);

  27. String imageName = htmlurl.substring(htmlurl.lastIndexOf("/") + 1);

  28. String path = "G:/test/" + imageName;

  29. File file = new File(path);

  30. // 2.讀取網頁全部內容,採用utf-8編碼方式來讀取

  31. // url.openStream()獲取輸入流,並用BufferedReader進行封裝

  32. InputStream is = url.openStream();

  33. OutputStream os = null;

  34. os = new FileOutputStream(file);

  35. int b = is.read();

  36. while (b != -1) {

  37. os.write(b);

  38. b = is.read();

  39. }

  40. is.close();

  41. os.close();

  42. } catch (MalformedURLException me) {

  43. System.out.println("你輸入的URL格式有問題!請仔細輸入");

  44. me.getMessage();

  45. throw me;

  46. } catch (IOException e) {

  47. e.printStackTrace();

  48. throw e;

  49. }

  50. }

  51. publicstaticvoid main(String[] args) throws IOException {

  52. AllSpider as = new AllSpider();

  53. //最普通的方式

  54. String imaurl = "http://a.hiphotos.baidu.com/album/s%3D680%3Bq%3D90/sign=22cf18d6f3d3572c62e29fd4ba28121a/48540923dd54564eba46ab06b2de9c82d1584fe9.jpg";

  55. as.getImageFromWeb(imaurl);

  56. //採用第二種方式,抓取音頻文件

  57. String mp3url = "http://mc.djkk.com/mix/2012/2012-09/2012-09-20/2012092020483953.wma";

  58. as.getImageFromWeb(mp3url);

  59. }

  60. }


發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章