Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash -ef
- typeset -g PREFIX=''
- if [[ $# != 2 ]]
- then
- cat <<USAGE
- usage: $0 zip_dir dest_dir
- USAGE
- exit 1
- fi
- mkdir -p $2
- find $1 -name 'N*.zip' -exec unzip -u '{}' -d $2 ';'
- cd $2
- find . -name 'N*.xml' -exec cat '{}' '+' | xml2 | \
- awk -F= \
- -v OFS='=' \
- "\$1 ~ /GM_Point\/@id/{ \
- f = \$2; \
- print \"/ksj:GI/dataset/ksj:object/ksj:AA01/ksj:OBJ/jps:GM_Point/\" > f } \
- /GM_Point\/@id/,/dimension/{ print > f } \
- \$1 ~ /GM_Curve\/@id/{ \
- f = \$2; \
- print \"/ksj:GI/dataset/ksj:object/ksj:AA01/ksj:OBJ/jps:GM_Curve/\" > f } \
- /GM_Curve\/@id/, \
- /GM_OrientableCurve\/jps:GM_OrientablePrimitive\.primitive\/@idref/{ \
- print > f } \
- \$1 ~ /GM_Surface\/@id/{ \
- f = \$2; \
- print \"/ksj:GI/dataset/ksj:object/ksj:AA01/ksj:OBJ/jps:GM_Surface/\" > f } \
- /GM_Surface\/@id/,/GM_CompositeCurve\.generator\/@idref/{ print > f } \
- \$1 ~ /EC01\/@id/{ \
- f = \$2; \
- print \"/ksj:GI/dataset/ksj:object/ksj:AA01/ksj:OBJ/ksj:EC01/\" > f } \
- /EC01\/@id/,/EC01\/ksj:AAC=/{ print > f } \
- { i++; if (i % 100000 == 0) { printf \".\" } } \
- "
- find . -name 'ar*' -delete
- find . -name 'aac*' -delete
- typeset -i i
- (( i = 1 ))
- for ec in $(find . -name 'EC*')
- do
- (( i++ ))
- if [[ $(( i >> 32 > 0 )) ]]; then echo -n '.'; (( i = 1 )); fi
- area=$(grep -Po '(?<=idref=)a\d+' ${ec})
- echo ${area} >> ar${area#a}
- aac=$(grep -Po '(?<=AAC=)\d+' ${ec})
- echo ${area} >> aac${aac}
- echo ${ec#./} >> aac${aac}
- curve=$(grep -Po '(?<=idref=_)c\d+' ${area})
- echo ${curve} >> ar${area#a}
- typeset -a point=($(grep -Po '(?<=idref=)n\d+' ${curve}))
- for p in ${point[@]}
- do
- echo ${p} >> ar${area#a}
- done
- done
- (( i = 1 ))
- for aac in $(find . -name 'aac*')
- do
- (( i++ ))
- if [[ $(( i % 1000000 == 0 )) ]]; then echo -n '.'; fi
- rm -f temp1 temp2
- for area in $(cat ${aac} | grep -v '^EC' | sort -u)
- do
- cat ar${area#a}
- done | \
- sed 's/^\([a-z]\)/\1 /' | \
- sort -k1.1r -k2.1n -u | \
- tr -d ' ' | \
- while read dat
- do
- cat ${dat} >> temp1
- done
- for area in $(cat ${aac} | grep '^EC' | sort -u)
- do
- cat ${area} >> temp2
- done
- cat temp1 temp2 | 2xml | tidy -xml -utf8 > "${PREFIX}${aac#./aac}.xml" 2>/dev/null
- rm -f temp1 temp2
- done
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement