11ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#!/usr/bin/env python
21ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
31ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# This parser parses the output from Phil Harvey's exiftool (version 9.02)
41ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# and convert it to xml format. It reads exiftool's output from stdin and
51ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# write the xml format to stdout.
61ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
71ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# In order to get the raw infomation from exiftool, we need to enable the verbose
81ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# flag (-v2) of exiftool.
91ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
101ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# Usage:
111ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#      exiftool -v2 img.jpg | ./parser.py >> output.xml
121ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
131ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
141ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
151ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingimport os
161ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingimport sys
171ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingimport re
181ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
191ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingtext = sys.stdin.read()
201ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
211ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingprint """<?xml version="1.0" encoding="utf-8"?>"""
221ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingprint "<exif>"
231ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
241ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# find the following two groups of string:
251ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
261ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 1. tag:
271ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
281ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# | | | x) name = value
291ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# | | |     - Tag 0x1234
301ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
311ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# 2. IFD indicator:
321ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
331ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling# | | | + [xxx directory with xx entries]
341ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling#
351ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingp = re.compile(
361ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        "(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
371ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        + "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
381ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        , re.M)
391ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingtags = p.findall(text)
401ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
411ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlinglayer = 0
421ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingifds = []
431ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
441ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingfor s in tags:
451ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling    # IFD indicator
461ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling    if s[2]:
471ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        l = len(s[3])
481ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        ifd = s[2][l + 3:].split()[0]
491ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        new_layer = l / 2 + 1
501ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        if new_layer > layer:
511ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            ifds.append(ifd)
521ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        else:
531ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            for i in range(layer - new_layer):
541ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                ifds.pop()
551ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            ifds[-1] = ifd
561ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        layer = new_layer
571ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling    else:
581ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        l = len(s[1])
591ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        s = s[0]
601ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        new_layer = l / 2
611ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        if new_layer < layer:
621ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            for i in range(layer - new_layer):
631ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                ifds.pop()
641ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        layer = new_layer
651ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
661ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        # find the ID
671ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        _id = re.search("0x[0-9a-f]{4}", s)
681ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        _id = _id.group(0)
691ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
701ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        # find the name
711ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
721ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        name = name.group(0).split()[1]
731ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
741ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        # find the raw value in the parenthesis
751ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        value = re.search("\(SubDirectory\) -->", s)
761ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        if value:
771ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            value = "NO_VALUE"
781ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        else:
791ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            value = re.search("\(.*\)\n", s)
801ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            if (name != 'Model' and value):
811ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                value = value.group(0)[1:-2]
821ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            else:
831ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                value = re.search("=.*\n", s)
841ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                value = value.group(0)[2:-1]
851ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                if "[snip]" in value:
861ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling                    value = "NO_VALUE"
871ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling
881ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling        print ('    <tag ifd="' + ifds[-1] + '" id="'
891ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberling            + _id + '" name="' + name +'">' + value + "</tag>")
901ac5817f362f6f8262bf00191e1e419996475380Sascha Haeberlingprint "</exif>"
91