使用了 Python 的 xml.etree.ElementTree 库, Python 版本 Python 3.6.6
- from xml.etree import ElementTree
- LISTTYPE = 1
- DICTTYPE = 0
- def getDictResults(res_dicts, iters):
- result_dicts = {}
- for iter in iters.getchildren():
- iterxml(iter, result_dicts)
- if result_dicts:
- res_dicts[iters.tag].update(result_dicts)
- def getListResults(res_dicts, iters):
- result_lists = []
- for iter in iters.getchildren():
- result_dicts = {}
- iterxml(iter, result_dicts)
- result_lists.append(result_dicts.copy())
- del(result_dicts)
- if result_lists:
- if len(res_dicts[iters.tag].items()) == 0:
- res_dicts[iters.tag] = result_lists.copy()
- else:
- res_dicts[iters.tag]["__XmlObjChildren__"] = result_lists.copy()
- del(result_lists)
- def checkxmlchildrentype(iters):
- taglist = []
- for iter in iters.getchildren():
- taglist.append(iter.tag)
- if len(set(taglist)) == len(taglist):
- return DICTTYPE
- else:
- return LISTTYPE
- def getResults(res_dicts, iters):
- if checkxmlchildrentype(iters):
- return getListResults(res_dicts, iters)
- else:
- return getDictResults(res_dicts, iters)
- #@res_dicts {}
- def iterxml(iter, res_dicts):
- res_dicts[iter.tag] = {}
- if iter.attrib:
- for k,v in dict(iter.attrib).items():
- res_dicts[iter.tag].update({k : v})
- if iter.text is not None and iter.text.strip() != "":
- res_dicts[iter.tag].update({"__XmlTagText__" : iter.text.strip()})
- if iter.getchildren():
- getResults(res_dicts, iter)
- def parserxmltojson(file_path):
- try:
- tree = ElementTree.parse(file_path)
- except Exception as e:
- #multi-byte encodings are not supported 把字符集改成 utf-8 就可以
- #encoding specified in xml declaration is incorrect xml encoding 标识和文件的字符集不同
- #syntax error 语法错误, 乱码等
- #not well-formed (invalid token) 编辑器点击后字符集被修改成 ASCII 等, 或者文件本身字符集和 xml encoding 不相同
- print("Parser {} Error, Errmsg: {}".format(file_path, e))
- return ""
- if tree is None:
- print("{} is None.".format(file_path))
- return ""
- root = tree.getroot()
- report = {}
- iterxml(root, report)
- #return getDictResults(root)
- return report
- if __name__ == "__main__":
- jsonret = parserxmltojson("test.xml")
- with open("test.json", "w", encoding="utf-8") as fd:
- fd.write(str(jsonret))
- print(jsonret)
来源: https://www.cnblogs.com/frisk/p/12634427.html