146819269ee04b3443fa56bc86f018f94241703d0Earl Ou#!/usr/bin/env python 246819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 346819269ee04b3443fa56bc86f018f94241703d0Earl Ou# This parser parses the output from Phil Harvey's exiftool (version 9.02) 446819269ee04b3443fa56bc86f018f94241703d0Earl Ou# and convert it to xml format. It reads exiftool's output from stdin and 546819269ee04b3443fa56bc86f018f94241703d0Earl Ou# write the xml format to stdout. 646819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 746819269ee04b3443fa56bc86f018f94241703d0Earl Ou# In order to get the raw infomation from exiftool, we need to enable the verbose 846819269ee04b3443fa56bc86f018f94241703d0Earl Ou# flag (-v2) of exiftool. 946819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 1046819269ee04b3443fa56bc86f018f94241703d0Earl Ou# Usage: 1146819269ee04b3443fa56bc86f018f94241703d0Earl Ou# exiftool -v2 img.jpg | ./parser.py >> output.xml 1246819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 1346819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 1446819269ee04b3443fa56bc86f018f94241703d0Earl Ou 1546819269ee04b3443fa56bc86f018f94241703d0Earl Ouimport os 1646819269ee04b3443fa56bc86f018f94241703d0Earl Ouimport sys 1746819269ee04b3443fa56bc86f018f94241703d0Earl Ouimport re 1846819269ee04b3443fa56bc86f018f94241703d0Earl Ou 1946819269ee04b3443fa56bc86f018f94241703d0Earl Outext = sys.stdin.read() 2046819269ee04b3443fa56bc86f018f94241703d0Earl Ou 2146819269ee04b3443fa56bc86f018f94241703d0Earl Ouprint """<?xml version="1.0" encoding="utf-8"?>""" 2246819269ee04b3443fa56bc86f018f94241703d0Earl Ouprint "<exif>" 2346819269ee04b3443fa56bc86f018f94241703d0Earl Ou 2446819269ee04b3443fa56bc86f018f94241703d0Earl Ou# find the following two groups of string: 2546819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 2646819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 1. tag: 2746819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 28097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou# | | | x) name = value 29097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou# | | | - Tag 0x1234 3046819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 3146819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 2. IFD indicator: 3246819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 33097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou# | | | + [xxx directory with xx entries] 3446819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 3546819269ee04b3443fa56bc86f018f94241703d0Earl Oup = re.compile( 360ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|" 37097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)" 3846819269ee04b3443fa56bc86f018f94241703d0Earl Ou , re.M) 3946819269ee04b3443fa56bc86f018f94241703d0Earl Outags = p.findall(text) 4046819269ee04b3443fa56bc86f018f94241703d0Earl Ou 41097b6014918a2ebf6f92241ac0d08f760883d90bEarl Oulayer = 0 42097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ouifds = [] 43097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou 4446819269ee04b3443fa56bc86f018f94241703d0Earl Oufor s in tags: 45097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou # IFD indicator 46097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou if s[2]: 47097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou l = len(s[3]) 48097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou ifd = s[2][l + 3:].split()[0] 49097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou new_layer = l / 2 + 1 50097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou if new_layer > layer: 51097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou ifds.append(ifd) 52097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou else: 53097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou for i in range(layer - new_layer): 54097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou ifds.pop() 55097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou ifds[-1] = ifd 56097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou layer = new_layer 5746819269ee04b3443fa56bc86f018f94241703d0Earl Ou else: 58097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou l = len(s[1]) 5946819269ee04b3443fa56bc86f018f94241703d0Earl Ou s = s[0] 60097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou new_layer = l / 2 61097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou if new_layer < layer: 62097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou for i in range(layer - new_layer): 63097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou ifds.pop() 64097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou layer = new_layer 65097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou 6663209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou # find the ID 6763209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou _id = re.search("0x[0-9a-f]{4}", s) 6863209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou _id = _id.group(0) 6963209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou 7063209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou # find the name 710ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s) 720ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou name = name.group(0).split()[1] 7363209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou 7446819269ee04b3443fa56bc86f018f94241703d0Earl Ou # find the raw value in the parenthesis 750ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = re.search("\(SubDirectory\) -->", s) 760ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou if value: 770ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = "NO_VALUE" 7846819269ee04b3443fa56bc86f018f94241703d0Earl Ou else: 790ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = re.search("\(.*\)\n", s) 800ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou if (name != 'Model' and value): 810ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = value.group(0)[1:-2] 820ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou else: 830ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = re.search("=.*\n", s) 840ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = value.group(0)[2:-1] 850ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou if "[snip]" in value: 860ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou value = "NO_VALUE" 8746819269ee04b3443fa56bc86f018f94241703d0Earl Ou 88097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou print (' <tag ifd="' + ifds[-1] + '" id="' 8946819269ee04b3443fa56bc86f018f94241703d0Earl Ou + _id + '" name="' + name +'">' + value + "</tag>") 9046819269ee04b3443fa56bc86f018f94241703d0Earl Ouprint "</exif>" 91