- package com.sancai;
- import java.io.*;
- import java.net.HttpURLConnection;
- import java.net.URL;
- import java.net.URLConnection;
- import java.text.DateFormat;
- import java.text.ParseException;
- import java.text.SimpleDateFormat;
- import java.util.*;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- /**
- * 分析html
- */
- public class SynHtml{
- /**
- * 常用的格式化日期
- *
- * @param date Date
- * @return String
- */
- public static String formatDate(Date date){
- String result = "";
- if(date != null){
- try{
- DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
- result = sdf.format(date);
- }
- catch(Exception ex){
- ex.printStackTrace();
- }
- }
- return result;
- }
- public static void main(String args[]) throws IOException, ParseException{
- // songstate上是按周发音乐的,日期不要设的太靠前,否则下的音乐很多
- String datestr = "2008-07-28";
- DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
- Date date = sdf.parse(datestr);
- while(new Date().after(date)){
- String listUrl = "http://www.songtaste.com/music.php?tag=chart&dt=" + formatDate(date);
- String songUrl = "http://www.songtaste.com/playmusic.php?song_id=";
- List<Song> songList = getSongList(listUrl, songUrl);
- for(Song song : songList){
- // 保存路径
- downloadFile(song.getUrl(), "G:/music/", song.getName() + ".mp3");
- }
- Calendar cal = Calendar.getInstance();
- cal.setTime(date);
- // 加一周
- cal.add(Calendar.DATE, 7);
- date = cal.getTime();
- }
- }
- /**
- * 获取请求的Url的页面
- *
- * @param listUrl url
- * @param songUrl url
- * @return html
- */
- public static List<Song> getSongList(String listUrl, String songUrl){
- String regEx = "[`~!%*|':;'\\",\\\\[\\\\]./?~*|]";
- String hoHtml = getHtml(listUrl);
- hoHtml = hoHtml.substring(hoHtml.indexOf("<script>WL") + 8, hoHtml.indexOf("song_fun_btn") - 29);
- String[] songs = hoHtml.split("WL");
- List<Song> songList = new ArrayList<Song>();
- for(String item : songs){
- String[] song = item.split(",");
- if(song.length > 3){
- Pattern p = Pattern.compile(regEx);
- Matcher m = p.matcher(song[2]);
- songList.add(new Song(song[1].replace('"', ' ').trim(), m.replaceAll("").trim(), ""));
- }
- }
- String tem = "";
- for(Song song : songList){
- tem += song.getId() + ",";
- }
- if(tem.endsWith(",")){
- tem = tem.substring(0, tem.length() - 1);
- }
- Map<String, String> songUrlMap = getSongUrl(getHtml(songUrl + tem));
- for(Song song : songList){
- song.setUrl(songUrlMap.get(song.getId()));
- }
- return songList;
- }
- /**
- * 获取音乐的url
- *
- * @param html html
- * @return html
- */
- public static Map<String, String> getSongUrl(String html){
- html = html.substring(html.indexOf("<script>WrtSongLine(") + 8, html.length());
- html = html.substring(0, html.indexOf("</script>") - 29 + 8);
- String[] songs = html.split("WrtSongLine");
- Map<String, String> songUrlMap = new HashMap<String, String>();
- for(String item : songs){
- String[] song = item.split(",");
- if(song.length > 3){
- songUrlMap.put(song[0].replace('"', ' ').replace('(', ' ').trim(), song[5].replace('"', ' ').trim());
- }
- }
- return songUrlMap;
- }
- /**
- * 获取请求的Url的页面
- *
- * @param urlString url
- * @return html
- */
- public static String getHtml(String urlString){
- String res = "";
- try{
- URL url = new URL(urlString);
- HttpURLConnection conn = (HttpURLConnection)url.openConnection();
- conn.setDoOutput(true);
- conn.setRequestMethod("POST");
- Reader reader = new InputStreamReader(conn.getInputStream(), "GBK");
- java.io.BufferedReader in = new java.io.BufferedReader(reader);
- String line;
- while((line = in.readLine()) != null){
- res += line;
- }
- in.close();
- }
- catch(Exception e){
- System.out.println("error in wapaction,and e is " + e.getMessage());
- }
- return res;
- }
- public static void downloadFile(String urlS, String filePath, String fileFullName){
- try{
- URL theURL = new URL(urlS);
- URLConnection con = theURL.openConnection();
- byte[] buffer = new byte[4 * 1024];
- int read;
- String path = filePath + "/" + fileFullName;
- File fileFolder = new File(filePath);
- if(!fileFolder.exists()){
- if(fileFolder.mkdir()){
- System.out.println("新建文件夹失败》》》》》》》》》》》》》》》》》");
- }
- }
- InputStream in = null;
- in = con.getInputStream();
- FileOutputStream os = new FileOutputStream(path);
- while((read = in.read(buffer)) > 0){
- os.write(buffer, 0, read);
- }
- System.out.println("下载中了一首了。。。");
- os.close();
- in.close();
- }
- catch(IOException e){
- e.printStackTrace();
- }
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/2610201513911.html
来源: http://www.codesnippet.cn/detail/2610201513911.html