1#!/bin/sh 2# 3# This script imports HTML and CSS tags from source trees. Supported browsers: 4# 5# * WebKit 6# * Firefox 7# * dillo 8# * gtkhtml 9 10src_dir=$1 11tools_dir=`dirname $0` 12dest_dir="$tools_dir/../src" 13tmp_prefix="/tmp/$$" 14 15if [ -z "$src_dir" ]; then 16 echo "You must define a source directory to examine." 17fi 18 19if [ ! -d "$dest_dir" ]; then 20 echo "Unable to find htdocs directory. Tried $dest_dir" 21 exit 1 22fi 23 24 25if [ -d "$src_dir/WebKit" ]; then 26 # Tested as of WebKit-r55454 27 source_name="webkit" 28 grep -v "^#" $src_dir/Source/WebCore/css/CSSPropertyNames.in > ${tmp_prefix}.css-properties 29 grep -v "^#" $src_dir/Source/WebCore/css/CSSValueKeywords.in > ${tmp_prefix}.css-values 30 grep -v "^#" $src_dir/Source/WebCore/html/HTMLAttributeNames.in | cut -d" " -f1 | \ 31 egrep -v "^namespace\w*=" > ${tmp_prefix}.html-attrs 32 grep -v "^#" $src_dir/Source/WebCore/html/HTMLTagNames.in | cut -d" " -f1 | \ 33 egrep -v "^namespace\w*=" > ${tmp_prefix}.html-tags 34 egrep "equalIgnoringCase" $src_dir/Source/WebCore/html/HTML*.cpp | \ 35 ruby -e '$stdin.readlines.join("").scan(/\"([\w-]+)"/) { |tag| puts tag }' > ${tmp_prefix}.html-values 36 grep -r "protocolIs" $src_dir/Source/WebCore/* | ruby -e '$stdin.readlines.join("").scan(/\"([\w-]+)"/) { |tag| puts "#{tag}:" }' > ${tmp_prefix}.protocols 37 grep "map->add" $src_dir/Source/WebCore/html/HTMLInputElement.cpp | cut -d\" -f2 >> ${tmp_prefix}.html-values 38 grep "AtomicString,.*Header, (" $src_dir/Source/WebCore/platform/network/ResourceResponseBase.cpp | cut -d\" -f2 > ${tmp_prefix}.headers 39 grep -o -r 'httpHeaderField(".*"' $src_dir | cut -d\" -f2 >> ${tmp_prefix}.headers 40 egrep -r '"[-\+a-z]+/[-\+a-z]+"' $src_dir/Source/WebCore | ruby -e '$stdin.readlines.join("").scan(/\"([afimtvwx][\w\+-]+\/[\w\+-]+)"/) { puts $1 }' > ${tmp_prefix}.mime-types 41 grep DEFINE_STATIC $src_dir/Source/WebCore/css/CSSSelector.cpp | cut -d\" -f2 \ 42 > ${tmp_prefix}.css-pseudo 43 egrep -o '"@.*?\"' $src_dir/Source/WebCore/css/CSSParser.cpp | cut -d\" -f2 | cut -d"{" -f1 | \ 44 sed s/" "// > ${tmp_prefix}.css-atrules 45elif [ -d "$src_dir/xpcom" ]; then 46 # Tested as of Sep 1 2010 47 source_name="mozilla" 48 grep "^HTML_.*TAG" $src_dir/parser/htmlparser/public/nsHTMLTagList.h \ 49 | cut -d\( -f2 | cut -d, -f1 | cut -d\) -f1 > ${tmp_prefix}.html-tags 50 grep -r "Get.*Attr.*nsGkAtoms" $src_dir | perl -ne 'if (/nsGkAtoms::(\w+)/) { print "$1\n" } '\ 51 | xargs -n1 -I{} grep "({}," $src_dir/content/base/src/nsGkAtomList.h \ 52 | cut -d\" -f2 > ${tmp_prefix}.html-attrs 53 grep "nsHtml5AttributeName.*nsHtml5Atoms::" $src_dir/parser/html/nsHtml5AttributeName.cpp \ 54 | cut -d: -f3 | cut -d\) -f1 | cut -d, -f1 | xargs -n1 -I{} grep "({}," $src_dir/parser/html/nsHtml5AtomList.h \ 55 | cut -d\" -f2 >> ${tmp_prefix}.html-attrs 56 egrep "^ [a-z-]+," $src_dir/layout/style/nsCSSPropList.h | cut -d, -f1 \ 57 | awk '{ print $1 }' > ${tmp_prefix}.css-properties 58 grep 'CSS_KEY(' $src_dir/layout/style/nsCSSKeywordList.h | cut -d"(" -f2 \ 59 | cut -d, -f1 > ${tmp_prefix}.css-values 60 egrep '{ "[a-z]+:' $src_dir/docshell/build/nsDocShellModule.cpp | cut -d\" -f2 \ 61 > ${tmp_prefix}.protocols 62 grep -r 'aURI->SchemeIs("' $src_dir/* | cut -d\" -f2 | perl -ne 'chomp; print "$_:\n";' >> ${tmp_prefix}.protocols 63 grep -r 'uri->SchemeIs("' $src_dir/* | cut -d\" -f2 | perl -ne 'chomp; print "$_:\n";' >> ${tmp_prefix}.protocols 64 grep "{ \"" $src_dir/docshell/base/nsAboutRedirector.cpp | cut -d\" -f2 \ 65 | xargs -I{} echo "about:{}" >> ${tmp_prefix}.protocols 66 grep targetScheme.EqualsLiteral $src_dir/netwerk/base/public/nsNetUtil.h \ 67 | cut -d\" -f2 | sed s/$/:/g>> ${tmp_prefix}.protocols 68 grep "name.LowerCaseEqualsLiteral" $src_dir/docshell/base/nsDocShell.cpp | cut -d\" -f2 >> ${tmp_prefix}.html-values 69 egrep ' { "[a-z]+' $src_dir/content/html/content/src/nsGenericHTMLElement.cpp | cut -d\" -f2 >> ${tmp_prefix}.html-values 70 grep ' { "' $src_dir/content/html/content/src/nsHTMLInputElement.cpp | cut -d\" -f2 >> ${tmp_prefix}.html-values 71 grep -r value.LowerCaseEqualsLiteral $src_dir/content/base/src/* | cut -d\" -f2 >> ${tmp_prefix}.html-values 72 grep "^HTTP_ATOM" $src_dir/netwerk/protocol/http/nsHttpAtomList.h | cut -d\" -f2 \ 73 | grep '[a-z]'>> ${tmp_prefix}.headers 74 egrep -r '"[-\+a-z]+/[-\+a-z]+"' $src_dir/browser/base $src_dir/browser/components $src_dir/uriloader $src_dir/netwerk/mime $src_dir/content/html \ 75 | ruby -e '$stdin.readlines.join("").scan(/\"([afimtvwx][\w\+-]+\/[\w\+-]+)"/) { puts $1 }' > ${tmp_prefix}.mime-types 76 egrep -o '":(.*?)"' $src_dir/layout/style/nsCSSPseudoClassList.h | cut -d\" -f2 \ 77 | sed s/^:// > ${tmp_prefix}.css-pseudo 78 grep AssignLiteral $src_dir/layout/style/nsCSSRules.cpp | egrep -o '"@.*?"' \ 79 | cut -d\" -f2 | cut -d" " -f1 > ${tmp_prefix}.css-atrules 80elif [ -f "$src_dir/dillorc" ]; then 81 # Tested as of dillo 2.2 82 source_name="dillo" 83 grep '{"' $src_dir/src/cssparser.cc | cut -d\" -f2 > ${tmp_prefix}.css-properties 84 egrep '^ +\"[a-z-]+\", ' $src_dir/src/cssparser.cc | \ 85 ruby -e '$stdin.readlines.join("").scan(/\"(.*?)\"/) { |tag| puts tag }' > ${tmp_prefix}.css-values 86 grep "_get_attr(html" $src_dir/src/html.cc | grep '"' | cut -d\" -f2 > ${tmp_prefix}.html-attrs 87 grep 'a_Html_get_attr(html.*"' $src_dir/src/*.cc | cut -d\" -f2 >> ${tmp_prefix}.html-attrs 88 grep Html_tag_open_ $src_dir/src/html.cc | grep "^ {" | cut -d\" -f2 > ${tmp_prefix}.html-tags 89 grep dStrcasecmp $src_dir/src/form.cc $src_dir/src/html.cc $src_dir/src/table.cc | \ 90 ruby -e '$stdin.readlines.join("").scan(/\"([-a-z]+)\"/) { |tag| puts tag }' > ${tmp_prefix}.html-values 91 grep -r 'URL_SCHEME.*"[a-z]' $src_dir | cut -d \" -f2 | perl -ne 'chomp; print "$_:\n";' > ${tmp_prefix}.protocols 92 grep -r 'header, "' $src_dir/src/cache.c | cut -d\" -f2 > ${tmp_prefix}.headers 93 egrep -r "[-\+a-z]+/[-\+a-z]+" $src_dir/dpi $src_dir/src | \ 94 ruby -e '$stdin.readlines.join("").scan(/\"([\w\+-]+\/[\w\+-]+)"/) { puts $1 }' > ${tmp_prefix}.mime-types 95elif [ -d "$src_dir/gtkhtml" ]; then 96 # tested as of gtkhtml-3.29.91 97 source_name="gtkhtml" 98 grep -r "#define ID_" $src_dir/gtkhtml/htmlengine.c | cut -d\" -f2 | egrep '^[a-z]' > ${tmp_prefix}.html-tags 99 grep "html_element_get_attr" $src_dir/gtkhtml/*.c | cut -d\" -f2 > ${tmp_prefix}.html-attrs 100 grep -r "g_ascii_strncasecmp" $src_dir/gtkhtml/*.c | cut -d\" -f2 | grep -v ":" | cut -d"=" -f1 | grep "^[a-z]" > ${tmp_prefix}.html-attrs 101 grep "g_ascii_strncasecmp" $src_dir/gtkhtml/htmlstyle.c | cut -d\" -f2 | cut -d" " -f1 | sed s/://g > ${tmp_prefix}.css-properties 102 grep "g_ascii_strcasecmp" $src_dir/gtkhtml/htmlstyle.c | cut -d\" -f2 > ${tmp_prefix}.css-values 103 grep g_ascii_strcasecmp $src_dir/gtkhtml/htmlengine.c | ruby -e '$stdin.readlines.join("").scan(/\"([\/\w-]+)"/) { |tag| puts tag }' > ${tmp_prefix}.html-values 104fi 105 106if [ "$source_name" ]; then 107 echo "Updating $source_name" 108 # We always append, never remove. 109 types="css-properties css-values html-attrs html-tags html-values protocols headers mime-types css-pseudo css-atrules" 110 for type in $types 111 do 112 if [ -f "${tmp_prefix}.${type}" ]; then 113 if [ -s "${tmp_prefix}.${type}" ]; then 114 echo "- $type" 115 cat $dest_dir/$type/$source_name ${tmp_prefix}.${type} | sort -u > $dest_dir/$type/$source_name 116 else 117 echo "- Unable to parse ${type}, source code is incompatible (skipping)" 118 fi 119 rm -f "${tmp_prefix}.${type}" 120 fi 121 done 122else 123 echo "Could not identify the correct source type for $src_dir" 124fi 125 126