11ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#!/usr/bin/env python 21ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 31ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# This parser parses the output from Phil Harvey's exiftool (version 9.02) 41ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# and convert it to xml format. It reads exiftool's output from stdin and 51ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# write the xml format to stdout. 61ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 71ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# In order to get the raw infomation from exiftool, we need to enable the verbose 81ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# flag (-v2) of exiftool. 91ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 101ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# Usage: 111ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# exiftool -v2 img.jpg | ./parser.py >> output.xml 121ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 131ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 141ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 151ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingimport os 161ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingimport sys 171ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingimport re 181ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 191ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingtext = sys.stdin.read() 201ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 211ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingprint """<?xml version="1.0" encoding="utf-8"?>""" 221ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingprint "<exif>" 231ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 241ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# find the following two groups of string: 251ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 261ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 1. tag: 271ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 281ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# | | | x) name = value 291ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# | | | - Tag 0x1234 301ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 311ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 2. IFD indicator: 321ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 331ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# | | | + [xxx directory with xx entries] 341ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 351ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingp = re.compile( 361ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|" 371ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" 381ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling , re.M) 391ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingtags = p.findall(text) 401ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 411ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlinglayer = 0 421ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingifds = [] 431ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 441ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingfor s in tags: 451ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling # IFD indicator 461ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling if s[2]: 471ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling l = len(s[3]) 481ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling ifd = s[2][l + 3:].split()[0] 491ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling new_layer = l / 2 + 1 501ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling if new_layer > layer: 511ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling ifds.append(ifd) 521ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling else: 531ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling for i in range(layer - new_layer): 541ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling ifds.pop() 551ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling ifds[-1] = ifd 561ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling layer = new_layer 571ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling else: 581ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling l = len(s[1]) 591ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling s = s[0] 601ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling new_layer = l / 2 611ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling if new_layer < layer: 621ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling for i in range(layer - new_layer): 631ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling ifds.pop() 641ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling layer = new_layer 651ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 661ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling # find the ID 671ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling _id = re.search("0x[0-9a-f]{4}", s) 681ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling _id = _id.group(0) 691ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 701ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling # find the name 711ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s) 721ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling name = name.group(0).split()[1] 731ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 741ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling # find the raw value in the parenthesis 751ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = re.search("\(SubDirectory\) -->", s) 761ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling if value: 771ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = "NO_VALUE" 781ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling else: 791ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = re.search("\(.*\)\n", s) 801ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling if (name != 'Model' and value): 811ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = value.group(0)[1:-2] 821ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling else: 831ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = re.search("=.*\n", s) 841ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = value.group(0)[2:-1] 851ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling if "[snip]" in value: 861ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling value = "NO_VALUE" 871ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling 881ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling print (' <tag ifd="' + ifds[-1] + '" id="' 891ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling + _id + '" name="' + name +'">' + value + "</tag>") 901ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingprint "</exif>" 91