- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.HashMap;
- import java.util.Iterator;
- import java.util.List;
- import java.util.Map;
- import java.util.Stack;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- public class T4 {
- static class Node{
- String nodename;//标签名
- Map<String, String> attributes;//属性
- String text;//文本
- List<Node> childers;//子元素
- @Override
- public String toString() {
- // TODO Auto-generated method stub
- StringBuilder builder=new StringBuilder();
- builder.append("tagname:"+this.nodename);
- Iterator<String> iterator=attributes.keySet().iterator();
- builder.append(",attribute:[");
- while(iterator.hasNext()){
- String key=iterator.next();
- builder.append(key+"="+attributes.get(key)+",");
- }
- builder.append("],");
- builder.append("children:[");
- for(Node node:childers){
- builder.append(node.nodename+",");
- }
- builder.append("]");
- return builder.toString().replaceAll(",]", "]");
- }
- }
- public static void main(String[] args) throws IOException {
- String xml="<xml a=b c=d e=5>testxml" +
- "<a>testa</a>" +
- "<b>testb</b>"+
- "</xml>";
- Pattern pattern_kt=Pattern.compile("^<(\\\\w+)[^<]*>([^<]*)");//匹配开头
- Pattern pattern_attr=Pattern.compile("(\\\\w+)=(\\\\w+)");//匹配属性
- Pattern pattern_jw=Pattern.compile("^</(\\\\w+)>");//匹配结尾
- Pattern pattern=Pattern.compile("<(\\\\w+)[^<]*>([^<]*)|</(\\\\w+)>");//匹配下一个(可能是开始也可以是结尾)
- Stack<Node> nodes=new Stack<Node>(); //元素栈
- List<Node> list_nodes=new ArrayList<Node>();//真正的符合规范的元素集合
- Matcher matcher=pattern.matcher(xml);
- while(matcher.find()){
- String tmp=matcher.group();
- Matcher matcher2=pattern_kt.matcher(tmp);//匹配开头
- Matcher matcher3=pattern_jw.matcher(tmp);//匹配结尾
- //如果是开头就找到元素的 标签名称 和元素的属性 还有元素的文本
- if(matcher2.find()){
- Node node=new Node();
- //父节点
- if(!nodes.isEmpty()){
- nodes.peek().childers.add(node);
- }
- nodes.push(node);
- node.attributes=new HashMap<String, String>();
- node.childers=new ArrayList<T4.Node>();
- node.nodename=matcher2.group(1);//标签名称
- Matcher attrs=pattern_attr.matcher(tmp);
- //属性
- while(attrs.find()){
- node.attributes.put(attrs.group(1),attrs.group(2));
- }
- //文本
- if(matcher2.group(2)!=null){
- node.text=matcher2.group(2);
- }
- //如果是结尾 就弹出栈,弹出的如果和当前的标签名称一样 才加入集合
- }else if(matcher3.find()){
- Node node=nodes.pop();
- if(node.nodename.equals(matcher3.group(1))){
- list_nodes.add(node);
- }
- }
- }
- for(Node node:list_nodes){
- System.out.println(node);
- }
- }
- }
- //该片段来自于http://www.codesnippet.cn/detail/1208201513422.html
来源: http://www.codesnippet.cn/detail/1208201513422.html