Ugrás a fő tartalomra

XML összahasonlítás pythonnal


 


Összevetésre használjuk a xmldiff csomagot


Példa 1

#  conda install xmldiff
#  from xmldiff import main
#  from xmldiff import main, formatting
import xmldiff
diff1 = xmldiff.main.diff_files(r"c:\Users\User\Downloads\kl.xml",
                                             r"c:\Users\User\Downloads\kl_.xml",
                                             diff_options = {'F': 0.5, 'ratio_mode': 'fast'})
for egy in diff1:
   print('Eltérés : ',egy)


Eredmény 1


Eltérés :  MoveNode(node='/annotation/size/width[1]', target='/annotation/size[1]', position=2)
Eltérés :  MoveNode(node='/annotation/object[2]/bndbox[1]', target='/annotation/object[2]', position=2)
Eltérés :  UpdateTextIn(node='/annotation/source/database[1]', text='Unknown2')
Eltérés :  MoveNode(node='/annotation/object[1]/bndbox/ymax[1]', target='/annotation/object[1]/bndbox[1]', position=1)




Példa 2


diff2 = xmldiff.main.diff_files(r'c:\Users\User\Downloads\kl.xml', 
                                r'c:\Users\User\Downloads\kl_.xml',
                                diff_options = {'F': 0.5, 'ratio_mode': 'fast'},
                                formatter = xmldiff.formatting.XMLFormatter())


print(diff2)


Eredmény 2


<annotation xmlns:diff="http://namespaces.shoobx.com/diff" verified="yes">
	<folder>8bit</folder>
	<filename>xxx.png</filename>
	<path>D:/xxx.png</path>
	<source>
		<database>Unknown<diff:insert>2</diff:insert></database>
	</source>
	<size>
		<width diff:delete="">2656</width>
		<height>5310</height>
		<depth>1</depth>
	<diff:insert>	</diff:insert><width diff:insert="">2656</width>
<diff:delete>	</diff:delete><diff:insert>
</diff:insert>	</size>
	<segmented>0</segmented>
	<object>
		<name>pos_screen_b5</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>128</xmin><diff:insert>		</diff:insert>
			<ymax diff:insert="">842</ymax>
		<diff:insert>		</diff:insert><ymin>672</ymin>
			<xmax>179</xmax>
		<diff:delete>	</diff:delete><ymax diff:delete="">842</ymax>
		</bndbox>
	</object>
	<object>
		<name>pos_screen_b4</name>
		<pose>Unspecified</pose><diff:insert>
</diff:insert>
		<bndbox diff:insert="">
			<xmin>132</xmin>
			<ymin>3550</ymin>
			<xmax>180</xmax>
			<ymax>3735</ymax>
		</bndbox>
	<diff:insert>			</diff:insert><truncated>0</truncated>
		<difficult>0</difficult>
	<diff:delete>	</diff:delete><bndbox diff:delete="">
			<xmin>132</xmin>
			<ymin>3550</ymin>
			<xmax>180</xmax>
			<ymax>3735</ymax>
		</bndbox>
	</object>
	<object>
		<name>pos_screen_b1</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>2450</xmin>
			<ymin>4450</ymin>
			<xmax>2505</xmax>
			<ymax>4631</ymax>
		</bndbox>
	</object>
	<object>
		<name>pos_screen_b2</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>2447</xmin>
			<ymin>3438</ymin>
			<xmax>2507</xmax>
			<ymax>3615</ymax>
		</bndbox>
	</object>
	<object>
		<name>pos_screen_b3</name>
		<pose>Unspecified</pose>
		<truncated>0</truncated>
		<difficult>0</difficult>
		<bndbox>
			<xmin>2456</xmin>
			<ymin>1965</ymin>
			<xmax>2516</xmax>
			<ymax>2142</ymax>
		</bndbox>
	</object>
</annotation>




Ciklikus xml filek végig olvasása



Minta 3


import os
import glob

directory = r'c:\Users\User\Downloads'
for file in os.listdir(directory):
#    print(file)
    filename = os.fsdecode(file)  
#    print(filename)
    if filename.endswith(".xml"):
        print(filename)
        #filename = filename[:-4]


print('---------------------')

xml_files = []

try:
    print("Checking: For xml files in folder: " + str(directory))
    xml_files = glob.glob(directory + "/*.xml")    
except Exception as e:
    print("Failed to glob xml files. Possible bad regex")
else:
    print("Checking: Xml files globbed successfully. Counting...")
    print("Xml files located in directory: " + str(len(xml_files)))
print('Eredmény ->')        
            



print(xml_files)

Eredmény 2

academium.pdm.xml
DataTypes.xml
dl_settings.xml
feed.xml
fuzzi_output.xml
kl.xml
kl2.xml
kl_.xml
theme-3045332980529254583.xml
UserSnippets.xml
UserSnippets_20200828.xml
---------------------
Checking: For xml files in folder: c:\Users\User\Downloads
Checking: Xml files globbed successfully. Counting...
Xml files located in directory: 11
Eredmény ->
['c:\\Users\\User\\Downloads\\academium.pdm.xml', 'c:\\Users\\User\\Downloads\\DataTypes.xml', 'c:\\Users\\User\\Downloads\\dl_settings.xml', 'c:\\Users\\User\\Downloads\\feed.xml', 'c:\\Users\\User\\Downloads\\fuzzi_output.xml', 'c:\\Users\\User\\Downloads\\kl.xml', 'c:\\Users\\User\\Downloads\\kl2.xml', 'c:\\Users\\User\\Downloads\\kl_.xml', 'c:\\Users\\User\\Downloads\\theme-3045332980529254583.xml', 'c:\\Users\\User\\Downloads\\UserSnippets.xml', 'c:\\Users\\User\\Downloads\\UserSnippets_20200828.xml']





import xml.etree.ElementTree as ET
context = ET.iterparse(r'c:\Users\User\Documents\data\xml\kl2.xml', events=('start' , 'end' ))
for event, elem in context:
   print(elem.tag, event)

---
import xml.etree.ElementTree as ET
root = ET.parse(r"c:\Users\User\Documents\data\xml\kl2.xml").getroot()
#num_elems = root.xpath("count(//metadata)")    # note: returns a float
print(root.tag)
products = root.findall("food")
print(len(products))
print(1, products[0].tag)
for x in products[0]:
        print(2, x.tag, x.text)

----

import lxml.etree
doc = lxml.etree.parse(r"c:\Users\User\Documents\data\xml\kl2.xml")
count = doc.xpath('count(//food)')
print(count)



Megjegyzések