- package main
- import (
- "fmt"
- "io/ioutil"
- "net/http"
- "regexp"
- "strings"
- )
- func main() {
- resp, err := http.Get("http://www.CodeSnippet.cn/")
- if err != nil {
- fmt.Println("http get error.")
- }
- defer resp.Body.Close()
- body, err := ioutil.ReadAll(resp.Body)
- if err != nil {
- fmt.Println("http read error")
- return
- }
- src := string(body)
- //将html标签全转换成小写
- re, _ := regexp.Compile("\\\\<[\\\\S\\\\s]+?\\\\>") src1= re.ReplaceAllStringFunc(src, strings.ToLower)
- //去除STYLE
- re, _ = regexp.Compile("\\\\<style[\\\\S\\\\s]+?\\\\</style\\\\>") src1= re.ReplaceAllString(src, "")
- //去除SCRIPT
- re, _ = regexp.Compile("\\\\<script[\\\\S\\\\s]+?\\\\</script\\\\>") src1= re.ReplaceAllString(src, "")
- //去除所有尖括号内的HTML代码,并换成换行符
- re, _ = regexp.Compile("\\\\<[\\\\S\\\\s]+?\\\\>") src1= re.ReplaceAllString(src, "\\n")
- //去除连续的换行符
- re, _ = regexp.Compile("\\\\s{2,}") src1= re.ReplaceAllString(src, "\\n")
- fmt.Println(strings.TrimSpace(src))
- }
- //该片段来自于http://www.codesnippet.cn/detail/050320132327.html
来源: http://www.codesnippet.cn/detail/050320132327.html