|
|
@ -77,14 +77,9 @@ class MixTextProcessor():
|
|
|
|
|
|
|
|
|
|
|
|
class DomXml():
|
|
|
|
class DomXml():
|
|
|
|
def __init__(self, xmlstr):
|
|
|
|
def __init__(self, xmlstr):
|
|
|
|
print("Parse xml str:", xmlstr)
|
|
|
|
|
|
|
|
self.tdom = parseString(xmlstr) #Document
|
|
|
|
self.tdom = parseString(xmlstr) #Document
|
|
|
|
# print("tdom:",type(self.tdom))
|
|
|
|
|
|
|
|
self.root = self.tdom.documentElement #Element
|
|
|
|
self.root = self.tdom.documentElement #Element
|
|
|
|
# print("root:",type(self.root))
|
|
|
|
|
|
|
|
self.rnode = self.tdom.childNodes #NodeList
|
|
|
|
self.rnode = self.tdom.childNodes #NodeList
|
|
|
|
# print("rnode:",type(self.rnode))
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_text(self):
|
|
|
|
def get_text(self):
|
|
|
|
'''返回 xml 内容的所有文本内容的列表'''
|
|
|
|
'''返回 xml 内容的所有文本内容的列表'''
|
|
|
@ -127,7 +122,7 @@ class DomXml():
|
|
|
|
return res
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
def get_pinyins_for_xml(self):
|
|
|
|
def get_pinyins_for_xml(self):
|
|
|
|
'''返回xml 内容,如果字符串 和 拼音的 list , 如 ['''
|
|
|
|
'''返回 xml 内容,字符串和拼音的 list '''
|
|
|
|
res = []
|
|
|
|
res = []
|
|
|
|
|
|
|
|
|
|
|
|
for x1 in self.rnode:
|
|
|
|
for x1 in self.rnode:
|
|
|
|