-
Notifications
You must be signed in to change notification settings - Fork 0
/
wikivoyage2osm.sh
executable file
·428 lines (410 loc) · 19.2 KB
/
wikivoyage2osm.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
#!/bin/bash
#
# Wikivoyage2OSM
#
# Extract Wikivoyage Points Of Interest (POI), validate them, and generate OpenStreetMap (OSM) and CSV files.
# Reference: https://en.wikivoyage.org/wiki/Wikivoyage:Listings
# To make URLs clickable in CSV files, search for 'http.*' and replace with '=HYPERLINK("&")' as per https://forum.openoffice.org/en/forum/viewtopic.php?f=9&t=18313#p83972
#
# Usage ./wikivoyage.sh enwikivoyage-20131130-pages-articles.xml
#
# License: GNU-GPLv3
# Website: https://github.com/nicolas-raoul/wikivoyage2osm
# Tracker: https://github.com/nicolas-raoul/wikivoyage2osm/issues
# Results: https://sourceforge.net/p/wikivoyage
####################################################
# Settings begin
####################################################
# Target file (unit test if none given).
DESTINATION=${1:-rattanakosin.xml}
# Whether to validate the Wikivoyage content
# Invalid items are logged in invalid-* files in the same directory.
VALIDATE=NO # YES or NO
# Whether to generate CSV and OSM files
GENERATE_CSV=YES # YES or NO
GENERATE_OSM=NO # YES or NO
GENERATE_RDF=NO # YES or NO
####################################################
# Settings end
####################################################
# Constants
EDIT_PREFIX="[https://en.wikivoyage.org/w/index.php?title="
EDIT_MIDDLE="&action=edit "
EDIT_SUFFIX="]"
# Regular expressions
REGEX_TITLE='^<title>'
REGEX_TYPE='^(listing|do|see|buy|drink|eat|sleep)$'
REGEX_PHONE='^\+[-0-9 ]+$' # https://en.wikivoyage.org/wiki/Wikivoyage:Phone_numbers
REGEX_PHONE_STRICT='^\+[0-9 ]+ [-0-9]+$' # https://en.wikivoyage.org/wiki/Wikivoyage:Phone_numbers https://en.wikipedia.org/wiki/List_of_country_calling_codes
REGEX_TOLLFREE='^\+?[-0-9 ]+$' # Same as above but + is not required as toll free is incompatible with country code in some countries
REGEX_TOLLFREE_STRICT='^(\+[0-9 ]+ )?[-0-9]+$'
REGEX_EMAIL_CHAR='[[:alnum:]!#\$%&'\''\*\+/=?^_\`{|}~-]' # http://stackoverflow.com/a/14172402
REGEX_EMAIL="^${REGEX_EMAIL_CHAR}+(\.${REGEX_EMAIL_CHAR}+)*@([[:alnum:]]([[:alnum:]-]*[[:alnum:]])?\.)+[[:alnum:]]([[:alnum:]-]*[[:alnum:]])?$"
REGEX_URL='^((https?|ftp|file):)?//[-A-Za-z0-9\+&@#/%?=~_|!:,.;]+$' # http://stackoverflow.com/a/3184819 plus no-protocol
REGEX_LAT='^[-+]?([1-8]?[0-9](\.[0-9]+)?|90(\.0+)?)$' # http://stackoverflow.com/a/18690202
REGEX_LONG='^[-+]?(180(\.0+)?|((1[0-7][0-9])|([1-9]?[0-9]))(\.[0-9]+)?)$' # http://stackoverflow.com/a/18690202
REGEX_TIME='(1?[0-9](:[0-9]{2})?[A|P]M|[012][0-9]:[0-9]{2}|noon|midnight)'
REGEX_TIMESPAN="([MTWTFSau-]+ )?${TIME}[-–]${TIME}"
REGEX_HOURS="^(${REGEX_TIMESPAN}(, ${REGEX_TIMESPAN})*|24 hours daily)$" # https://en.wikivoyage.org/wiki/Wikivoyage:Time_and_date_formats
REGEX_CHECKIN="^${TIME}$"
# Initialize output.
if [[ $VALIDATE == "YES" ]]
then
INVALID_TYPE=invalid-type.log
INVALID_PHONE=invalid-phone.log
INVALID_PHONE_STRICT=invalid-phone-strict.log
INVALID_TOLLFREE=invalid-tollfree.log
INVALID_TOLLFREE_STRICT=invalid-tollfree-strict.log
INVALID_EMAIL=invalid-email.log
INVALID_FAX=invalid-fax.log
INVALID_FAX_STRICT=invalid-fax-strict.log
INVALID_URL=invalid-url.log
INVALID_LATLONG=invalid-latlong.log
INVALID_HOURS=invalid-hours.log
INVALID_CHECKINOUT=invalid-checkinout.log
> $INVALID_TYPE
> $INVALID_PHONE
> $INVALID_PHONE_STRICT
> $INVALID_TOLLFREE
> $INVALID_TOLLFREE_STRICT
> $INVALID_EMAIL
> $INVALID_FAX
> $INVALID_FAX_STRICT
> $INVALID_URL
> $INVALID_LATLONG
> $INVALID_HOURS
> $INVALID_CHECKINOUT
fi
if [[ $GENERATE_CSV == "YES" ]]
then
CSV=$DESTINATION.csv
echo "TITLE;TYPE;NAME;ALT;ADDRESS;DIRECTIONS;PHONE;TOLLFREE;EMAIL;FAX;URL;HOURS;CHECKIN;CHECKOUT;IMAGE;PRICE;LAT;LON;CONTENT" > $CSV
fi
if [[ $GENERATE_OSM == "YES" ]]
then
OSM=$DESTINATION.osm
echo "<?xml version='1.0' encoding='UTF-8'?>" > $OSM
echo "<osm version='0.6' generator='wikivoyage2osm'>" >> $OSM
fi
if [[ $GENERATE_RDF == "YES" ]]
then
RDF=$DESTINATION.rdf
echo "<?xml version='1.0'?>" > $RDF
echo "<rdf:RDF" >> $RDF
echo " xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'" >> $RDF
echo " xmlns:schema='http://schema.org/'>" >> $RDF
fi
# Transform the data into one POI or title per line.
POIS=`mktemp`
#DESTINATION_FILE=`readlink -f $DESTINATION.xml`
DESTINATION_FILE=`realpath $DESTINATION` # TODO Use "readlink -f" instead (installed by default on Ubuntu). Or automatically do: apt-get install realpath
cat $DESTINATION_FILE |\
tr '\n' ' ' |\
sed -e 's/{{flag|[^}]*}}[[:space:]]*{{listing/{{legation/g' |\
awk -vRS='{{' -vORS='\n{{' 1 |\
awk -vRS='}}' -vORS='\n}}' 1 |\
awk -vRS='<title>' -vORS='\n<title>' 1 |\
awk -vRS='</title>' -vORS='\n</title>' 1 |\
grep "{{listing|\|{{listing |{{do|\|{{do \|{{see|\|{{see \|{{buy|\|{{buy \|{{drink|\|{{drink \|{{eat|\|{{eat \|{{sleep|\|{{sleep \|{{legation|\|{{legation |\|<title>" |\
grep -v "{{see also" \
> $POIS
echo "POIs written to $POIS"
# Process each line (POI or title).
WIKIDATA_ITEM=""
ID=0
while read LINE; do
if [[ "$LINE" =~ $REGEX_TITLE ]]
then
# This line is a destination
TITLE=`echo "$LINE" | sed -e "s/<title>//g" -e "s/<\/title>//g"`
echo "$TITLE"
LINK_TITLE=`echo "$TITLE" | tr " " "_"`
WIKIDATA_ITEM="" # Reset Wikidata item, will be searched at first POI
else
# This line is a POI.
# Extract all data from the Wikivoyage POI listing
# Explanation:
# Get the value of the attribute we want: sed -e "s/.*name[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/"
# Skip if information is not present: grep -v "{{"
# Remove leading/trailing whitespace: sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'
# Remove Left-to-Right mark character as it is implicit: sed -e 's/\xe2\x80\x8e//'
TYPE=`echo "$LINE" | sed -e "s/.*{{[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
NAME=`echo "$LINE" | sed -e "s/.*name[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
ALT=`echo "$LINE" | sed -e "s/.*alt[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
ADDRESS=`echo "$LINE" | sed -e "s/.*address[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
DIRECTIONS=`echo "$LINE" | sed -e "s/.*directions[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
PHONE=`echo "$LINE" | sed -e "s/.*phone[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g' | sed -e 's/\xe2\x80\x8e//'`
TOLLFREE=`echo "$LINE" | sed -e "s/.*tollfree[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g' | sed -e 's/\xe2\x80\x8e//'`
EMAIL=`echo "$LINE" | sed -e "s/.*email[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
FAX=`echo "$LINE" | sed -e "s/.*fax[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
URL=`echo "$LINE" | sed -e "s/.*url[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
HOURS=`echo "$LINE" | sed -e "s/.*hours[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
CHECKIN=`echo "$LINE" | sed -e "s/.*checkin[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
CHECKOUT=`echo "$LINE" | sed -e "s/.*checkout[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
IMAGE=`echo "$LINE" | sed -e "s/.*image[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
PRICE=`echo "$LINE" | sed -e "s/.*price[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
LAT=`echo "$LINE" | sed -e "s/.*lat[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
LONG=`echo "$LINE" | sed -e "s/.*long[[:space:]]*=[[:space:]]*\([^|]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
CONTENT=`echo "$LINE" | sed -e "s/.*content[[:space:]]*=[[:space:]]*\([^}]*\).*/\1/" | grep -v "{{" | sed -e 's/^[[:space:]]*//g' -e 's/[[:space:]]*$//g'`
# Check attributes validity
if [[ "$VALIDATE" == "YES" ]]
then
if ! [[ $TYPE =~ $REGEX_TYPE ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $TYPE" >> $INVALID_TYPE
fi
if ! [[ -z $PHONE ]] && ! [[ $PHONE =~ $REGEX_PHONE ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $PHONE" >> $INVALID_PHONE
else
if ! [[ -z $PHONE ]] && ! [[ $PHONE =~ $REGEX_PHONE_STRICT ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $PHONE" >> $INVALID_PHONE_STRICT
fi
fi
if ! [[ -z $TOLLFREE ]] && ! [[ $TOLLFREE =~ $REGEX_TOLLFREE ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $TOLLFREE" >> $INVALID_TOLLFREE
else
if ! [[ -z $TOLLFREE ]] && ! [[ $TOLLFREE =~ $REGEX_TOLLFREE_STRICT ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $TOLLFREE" >> $INVALID_TOLLFREE_STRICT
fi
fi
if ! [[ -z $EMAIL ]] && ! [[ $EMAIL =~ $REGEX_EMAIL ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $EMAIL" >> $INVALID_EMAIL
fi
if ! [[ -z $FAX ]] && ! [[ $FAX =~ $REGEX_PHONE ]] # Same regex as phone
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $FAX" >> $INVALID_FAX
else
if ! [[ -z $FAX ]] && ! [[ $FAX =~ $REGEX_PHONE_STRICT ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $FAX" >> $INVALID_FAX_STRICT
fi
fi
if ! [[ -z $URL ]] && ! [[ $URL =~ $REGEX_URL ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX <nowiki>$URL</nowiki>" >> $INVALID_URL
fi
if ! [[ -z $LAT ]] && ! [[ $LAT =~ $REGEX_LAT ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX (lat) $LAT" >> $INVALID_LATLONG
fi
if ! [[ -z $LONG ]] && ! [[ $LONG =~ $REGEX_LONG ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX (long) $LONG" >> $INVALID_LATLONG
fi
if ! [[ -z $HOURS ]] && ! [[ $HOURS =~ $REGEX_HOURS ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX $HOURS" >> $INVALID_HOURS
fi
if ! [[ -z $CHECKIN ]] && ! [[ $CHECKIN =~ $REGEX_CHECKIN ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX (checkin) $CHECKIN" >> $INVALID_CHECKINOUT
fi
if ! [[ -z $CHECKOUT ]] && ! [[ $CHECKOUT =~ $REGEX_CHECKIN ]]
then
echo "# $EDIT_PREFIX$LINK_TITLE$EDIT_MIDDLE$TITLE$EDIT_SUFFIX (checkout) $CHECKOUT" >> $INVALID_CHECKINOUT
fi
# It seems that $IMAGE does not need checking as anything is allowed: https://commons.wikimedia.org/wiki/Commons:Village_pump#Characters_allowed_in_Commons_filenames.3F https://commons.wikimedia.org/wiki/Commons:File_naming
fi
if [[ "$GENERATE_CSV" == "YES" ]]
then
CSVLINE="\"$TITLE\";\"$TYPE\";\"$NAME\";\"$ALT\";\"$ADDRESS\";\"$DIRECTIONS\";\"$PHONE\";\"$TOLLFREE\";\"$EMAIL\";\"$FAX\";\"$URL\";\"$HOURS\";\"$CHECKIN\";\"$CHECKOUT\";\"$IMAGE\";\"$PRICE\";\"$LAT\";\"$LONG\";\"$CONTENT\""
# Unescape & back to & because no need to escape ampersands in CSV.
echo "$CSVLINE" | sed -e "s/&/\&/g" >> $CSV
fi
if [[ "$GENERATE_OSM" == "YES" ]] || [[ "$GENERATE_RDF" == "YES" ]]
then
# Escape single quotes in values so that they can be used as XML attribute values.
LAT=`echo "$LAT" | sed -e "s/'/\"/g"`
LONG=`echo "$LONG" | sed -e "s/'/\"/g"`
NAME=`echo "$NAME" | sed -e "s/'/\"/g"`
ALT=`echo "$ALT" | sed -e "s/'/\"/g"`
ADDRESS=`echo "$ADDRESS" | sed -e "s/'/\"/g"`
PHONE=`echo "$PHONE" | sed -e "s/'/\"/g"`
TOLLFREE=`echo "$TOLLFREE" | sed -e "s/'/\"/g"`
EMAIL=`echo "$EMAIL" | sed -e "s/'/\"/g"`
FAX=`echo "$FAX" | sed -e "s/'/\"/g"`
URL=`echo "$URL" | sed -e "s/'/\"/g"`
HOURS=`echo "$HOURS" | sed -e "s/'/\"/g"`
CHECKIN=`echo "$CHECKIN" | sed -e "s/'/\"/g"`
CHECKOUT=`echo "$CHECKOUT" | sed -e "s/'/\"/g"`
IMAGE=`echo "$IMAGE" | sed -e "s/'/\"/g"`
PRICE=`echo "$PRICE" | sed -e "s/'/\"/g"`
CONTENT=`echo "$CONTENT" | sed -e "s/'/\"/g"`
fi
# Output to OSM file if latitude/longitude present.
if [[ "$GENERATE_OSM" == "YES" ]]
then
if ! [[ -z $LAT ]] && ! [[ -z $LONG ]] # TODO integrate into "if" above
then
ID=`expr $ID + 1`
echo "<node id='$ID' visible='true' lat='$LAT' lon='$LONG' version='1'>" >> $OSM
case "$TYPE" in
"listing")
echo "<tag k='tourism' v='information'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:tourism Unspecified listings are often tourism information, even though not always.
;;
"do")
echo "<tag k='tourism' v='attraction'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:tourism Must emcompass sport activities, cinema, theme parks.
;;
"see")
echo "<tag k='tourism' v='museum'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:tourism Often museums, the icon also kind of apply for outdoor sights.
;;
"buy")
echo "<tag k='shop' v='supermarket'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:shop amenity:marketplace could apply too, but the icon for supermarket is much more recognizable.
;;
"drink")
echo "<tag k='amenity' v='bar'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:amenity
;;
"eat")
echo "<tag k='amenity' v='restaurant'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:amenity This is OSM's most general type of restaurants.
;;
"sleep")
echo "<tag k='amenity' v='hotel'/>" >> $OSM # http://wiki.openstreetmap.org/wiki/Key:tourism
;;
esac
if ! [[ -z $NAME ]]
then
echo "<tag k='name' v='$NAME'/>" >> $OSM
fi
if ! [[ -z $ALT ]]
then
echo "<tag k='alt_name' v='$ALT'/>" >> $OSM
fi
if ! [[ -z $ADDRESS ]]
then
echo "<tag k='addr:full' v='$ADDRESS'/>" >> $OSM
fi
if ! [[ -z $PHONE ]]
then
echo "<tag k='phone' v='$PHONE'/>" >> $OSM
fi
if ! [[ -z $TOLLFREE ]]
then
echo "<tag k='phone' v='$TOLLFREE'/>" >> $OSM
fi
if ! [[ -z $EMAIL ]]
then
echo "<tag k='email' v='$EMAIL'/>" >> $OSM
fi
if ! [[ -z $FAX ]]
then
echo "<tag k='fax' v='$FAX'/>" >> $OSM
fi
if ! [[ -z $URL ]]
then
echo "<tag k='website' v='$URL'/>" >> $OSM
fi
if ! [[ -z $HOURS ]]
then
echo "<tag k='opening_hours' v='$HOURS'/>" >> $OSM
fi
if ! [[ -z $CHECKIN ]]
then
echo "<tag k='opening_hours:checkin' v='$CHECKIN'/>" >> $OSM
fi
if ! [[ -z $CHECKOUT ]]
then
echo "<tag k='opening_hours:checkout' v='$CHECKOUT'/>" >> $OSM
fi
if ! [[ -z $IMAGE ]]
then
echo "<tag k='image' v='https://commons.wikimedia.org/wiki/File:$IMAGE'/>" >> $OSM
fi
if ! [[ -z $PRICE ]]
then
echo "<tag k='price' v='$PRICE'/>" >> $OSM
fi
if ! [[ -z $CONTENT ]]
then
echo "<tag k='note' v='$CONTENT'/>" >> $OSM
fi
echo "</node>" >> $OSM
fi
fi
if [[ "$GENERATE_RDF" == "YES" ]]
then
UUID=$(cat /proc/sys/kernel/random/uuid)
case "$TYPE" in
"listing")
#echo "<tag k='tourism' v='information'/>" >> $RDF
;;
"do")
#echo "<tag k='tourism' v='attraction'/>" >> $RDF # http://wiki.openstreetmap.org/wiki/Key:tourism Must emcompass sport activities, cinema, theme parks.
;;
"see")
#echo "<tag k='tourism' v='museum'/>" >> $RDF # http://wiki.openstreetmap.org/wiki/Key:tourism Often museums, the icon also kind of apply for outdoor sights.
;;
"buy")
#echo "<tag k='shop' v='supermarket'/>" >> $RDF # http://wiki.openstreetmap.org/wiki/Key:shop amenity:marketplace could apply too, but the icon for supermarket is much more recognizable.
;;
"drink")
#echo "<tag k='amenity' v='bar'/>" >> $RDF # http://wiki.openstreetmap.org/wiki/Key:amenity
;;
"eat")
# See http://schema.org/FoodEstablishment
echo "<rdf:Description rdf:nodeID='N$UUID'>" >> $RDF
echo "<rdf:type rdf:resource='http://schema.org/Restaurant'/>" >> $RDF
echo "<schema:name>$NAME</schema:name>" >> $RDF
if ! [[ -z $ALT ]]; then
echo "<schema:alternateName>$ALT</schema:alternateName>" >> $RDF
fi
if ! [[ -z $ADDRESS ]]; then
echo "<schema:address>$ADDRESS</schema:address>" >> $RDF
fi
if ! [[ -z $PHONE ]]; then
echo "<schema:telephone>$PHONE</schema:telephone>" >> $RDF
fi
if ! [[ -z $TOLLFREE ]]; then
echo "<schema:telephone>$TOLLFREE</schema:telephone>" >> $RDF
fi
if ! [[ -z $EMAIL ]]; then
echo "<schema:email>$EMAIL</schema:email>" >> $RDF
fi
if ! [[ -z $FAX ]]; then
echo "<schema:faxNumber>$FAX</schema:faxNumber>" >> $RDF
fi
#TODO lat/lon
if ! [[ -z $URL ]]; then
echo "<schema:url>$URL</schema:url>" >> $RDF
fi
if ! [[ -z $IMAGE ]]; then
echo "<schema:image>https://commons.wikimedia.org/wiki/File:$IMAGE</schema:image>" >> $RDF
fi
if ! [[ -z $PRICE ]]; then
echo "<schema:priceRange>$PRICE</schema:priceRange>" >> $RDF
fi
if ! [[ -z $CONTENT ]]; then
echo "<schema:description>$CONTENT</schema:description>" >> $RDF
fi
if [[ $WIKIDATA_ITEM == "" ]]; then
WIKIDATA=`wget --quiet -O - "http://www.wikidata.org/w/api.php?action=wbgetentities&sites=enwikivoyage&titles=Abbeville&format=xml&props="`
# if [[ $WIKIDATA == *entity id* ]]; then
# WIKIDATA_ITEM=`echo $WIKIDATA | sed -e "s/.*id=\"//" | sed -e "s/\".*//"`
# else
# WIKIDATA_ITEM="NONE"
# fi
fi
if [[ $WIKIDATA_ITEM != "NONE" ]]; then
TODO echo "<schema:description>$CONTENT</schema:description>" >> $RDF
fi
echo "</rdf:Description>" >> $RDF
;;
"sleep")
#echo "<tag k='amenity' v='hotel'/>" >> $RDF # http://wiki.openstreetmap.org/wiki/Key:tourism
;;
esac
fi
fi # Title or POI
done < $POIS
if [[ $GENERATE_OSM == "YES" ]]
then
echo "</osm>" >> $OSM
fi
if [[ $GENERATE_RDF == "YES" ]]
then
echo "</rdf:RDF>" >> $RDF
fi