146819269ee04b3443fa56bc86f018f94241703d0Earl Ou#!/usr/bin/env python
246819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
346819269ee04b3443fa56bc86f018f94241703d0Earl Ou# This parser parses the output from Phil Harvey's exiftool (version 9.02)
446819269ee04b3443fa56bc86f018f94241703d0Earl Ou# and convert it to xml format. It reads exiftool's output from stdin and
546819269ee04b3443fa56bc86f018f94241703d0Earl Ou# write the xml format to stdout.
646819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
746819269ee04b3443fa56bc86f018f94241703d0Earl Ou# In order to get the raw infomation from exiftool, we need to enable the verbose
846819269ee04b3443fa56bc86f018f94241703d0Earl Ou# flag (-v2) of exiftool.
946819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
1046819269ee04b3443fa56bc86f018f94241703d0Earl Ou# Usage:
1146819269ee04b3443fa56bc86f018f94241703d0Earl Ou#      exiftool -v2 img.jpg | ./parser.py >> output.xml
1246819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
1346819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
1446819269ee04b3443fa56bc86f018f94241703d0Earl Ou
1546819269ee04b3443fa56bc86f018f94241703d0Earl Ouimport os
1646819269ee04b3443fa56bc86f018f94241703d0Earl Ouimport sys
1746819269ee04b3443fa56bc86f018f94241703d0Earl Ouimport re
1846819269ee04b3443fa56bc86f018f94241703d0Earl Ou
1946819269ee04b3443fa56bc86f018f94241703d0Earl Outext = sys.stdin.read()
2046819269ee04b3443fa56bc86f018f94241703d0Earl Ou
2146819269ee04b3443fa56bc86f018f94241703d0Earl Ouprint """<?xml version="1.0" encoding="utf-8"?>"""
2246819269ee04b3443fa56bc86f018f94241703d0Earl Ouprint "<exif>"
2346819269ee04b3443fa56bc86f018f94241703d0Earl Ou
2446819269ee04b3443fa56bc86f018f94241703d0Earl Ou# find the following two groups of string:
2546819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
2646819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 1. tag:
2746819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
28097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou# | | | x) name = value
29097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou# | | |     - Tag 0x1234
3046819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
3146819269ee04b3443fa56bc86f018f94241703d0Earl Ou# 2. IFD indicator:
3246819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
33097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou# | | | + [xxx directory with xx entries]
3446819269ee04b3443fa56bc86f018f94241703d0Earl Ou#
3546819269ee04b3443fa56bc86f018f94241703d0Earl Oup = re.compile(
360ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou        "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
37097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
3846819269ee04b3443fa56bc86f018f94241703d0Earl Ou        , re.M)
3946819269ee04b3443fa56bc86f018f94241703d0Earl Outags = p.findall(text)
4046819269ee04b3443fa56bc86f018f94241703d0Earl Ou
41097b6014918a2ebf6f92241ac0d08f760883d90bEarl Oulayer = 0
42097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ouifds = []
43097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou
4446819269ee04b3443fa56bc86f018f94241703d0Earl Oufor s in tags:
45097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou    # IFD indicator
46097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou    if s[2]:
47097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        l = len(s[3])
48097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        ifd = s[2][l + 3:].split()[0]
49097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        new_layer = l / 2 + 1
50097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        if new_layer > layer:
51097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou            ifds.append(ifd)
52097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        else:
53097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou            for i in range(layer - new_layer):
54097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou                ifds.pop()
55097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou            ifds[-1] = ifd
56097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        layer = new_layer
5746819269ee04b3443fa56bc86f018f94241703d0Earl Ou    else:
58097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        l = len(s[1])
5946819269ee04b3443fa56bc86f018f94241703d0Earl Ou        s = s[0]
60097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        new_layer = l / 2
61097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        if new_layer < layer:
62097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou            for i in range(layer - new_layer):
63097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou                ifds.pop()
64097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        layer = new_layer
65097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou
6663209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou        # find the ID
6763209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou        _id = re.search("0x[0-9a-f]{4}", s)
6863209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou        _id = _id.group(0)
6963209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou
7063209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou        # find the name
710ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou        name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
720ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou        name = name.group(0).split()[1]
7363209dcb6bd21d6c1bf3936bc4a3f04302ec9f42Earl Ou
7446819269ee04b3443fa56bc86f018f94241703d0Earl Ou        # find the raw value in the parenthesis
750ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou        value = re.search("\(SubDirectory\) -->", s)
760ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou        if value:
770ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou            value = "NO_VALUE"
7846819269ee04b3443fa56bc86f018f94241703d0Earl Ou        else:
790ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou            value = re.search("\(.*\)\n", s)
800ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou            if (name != 'Model' and value):
810ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou                value = value.group(0)[1:-2]
820ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou            else:
830ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou                value = re.search("=.*\n", s)
840ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou                value = value.group(0)[2:-1]
850ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou                if "[snip]" in value:
860ffdd632e3739db5c0389840d410fa95afd59f88Earl Ou                    value = "NO_VALUE"
8746819269ee04b3443fa56bc86f018f94241703d0Earl Ou
88097b6014918a2ebf6f92241ac0d08f760883d90bEarl Ou        print ('    <tag ifd="' + ifds[-1] + '" id="'
8946819269ee04b3443fa56bc86f018f94241703d0Earl Ou            + _id + '" name="' + name +'">' + value + "</tag>")
9046819269ee04b3443fa56bc86f018f94241703d0Earl Ouprint "</exif>"
91