1 |
#!/usr/bin/python |
2 |
|
3 |
# thesaurus.py CONVERTS THESAURUS LIST INTO RDF, tokens.html, labels.html, aliases.html, and dict.html |
4 |
|
5 |
# Version 2007-NOV-27 |
6 |
|
7 |
import sys |
8 |
import time |
9 |
NCOLS = 3 |
10 |
MCOLS = 5 |
11 |
|
12 |
root = 'IVOAT' |
13 |
path = 'http://www.Astro.physik.Uni-Goettingen.DE/~hessman/rdf/' |
14 |
|
15 |
# ----- START OF FUNCTIONS |
16 |
|
17 |
# CONVERTS TOKEN TO KEY (FOR ALPHABETIZATION) |
18 |
def keyify (str) : |
19 |
s = str.replace('"','') |
20 |
return s.lower()+s |
21 |
# CONVERTS TOKEN TO STRING |
22 |
def untokenize (str) : |
23 |
key = keyify(str) |
24 |
if key in dictionary : |
25 |
entry = dictionary[key] |
26 |
return entry['LABEL'] |
27 |
else : |
28 |
print 'Why do we have to untokenize',str,'?' |
29 |
return str; |
30 |
# CONVERTS STRING TO TOKEN USING alternatingCapitalizationOfWordsInTheString |
31 |
def tokenize (str) : |
32 |
lastlower = False |
33 |
s = str.replace('"','').strip().replace('\t',' ').replace('+',' ').replace('[',' ').replace(']',' ').replace('-',' ').replace('_',' ').replace('.',' ') |
34 |
tok = '' |
35 |
caps = False |
36 |
first = True |
37 |
for c in s : |
38 |
if c == ' ' : |
39 |
caps = True |
40 |
elif first : |
41 |
tok += c |
42 |
elif caps == True : |
43 |
tok += c.upper() |
44 |
caps = False |
45 |
else : |
46 |
if lastlower and c.isupper() : |
47 |
tok += c |
48 |
else : |
49 |
tok += c.lower() |
50 |
caps = False |
51 |
if c.isalpha() and c.islower() : |
52 |
lastlower = True |
53 |
else : |
54 |
lastlower = False |
55 |
first = False |
56 |
return tok |
57 |
# VARIOUS HELP ROUTINES |
58 |
def saveEntry (entry, dict) : |
59 |
if not len(entry) == 0 : |
60 |
key = entry['KEY'] |
61 |
if key in dict : |
62 |
print 'Problem:',key,'is already in the dictionary!' |
63 |
print 'OLD:' |
64 |
print dict[key] |
65 |
print 'NEW:' |
66 |
print entry |
67 |
sys.exit(1) |
68 |
if not key == '' : |
69 |
dict[key] = entry |
70 |
return {'KEY':'', 'TOKEN':'', 'LABEL':'', 'ORIG':'', 'ALT':[], 'BT':[], 'NT':[], 'RT':[], 'D':[], 'REV':[], 'N':[], '=':[], 'HN':[], 'CN':[]} |
71 |
def saveToken (entry, tok, lab, orig) : |
72 |
entry['KEY'] = keyify(tok) |
73 |
entry['TOKEN'] = tok |
74 |
entry['LABEL'] = lab |
75 |
entry['ORIG'] = orig |
76 |
return |
77 |
def saveAlternate (entry, str) : |
78 |
list = entry['ALT'] |
79 |
list.append(str.replace('"','')); |
80 |
return |
81 |
def saveBroader (entry, str) : |
82 |
list = entry['BT'] |
83 |
list.append(tokenize(str)) |
84 |
return |
85 |
def saveNarrower (entry, str) : |
86 |
list = entry['NT'] |
87 |
list.append(tokenize(str)) |
88 |
return |
89 |
def saveRelated (entry, str) : |
90 |
list = entry['RT'] |
91 |
list.append(tokenize(str)) |
92 |
return |
93 |
def saveDescription (entry, str) : |
94 |
list = entry['D'] |
95 |
list.append(str); |
96 |
return |
97 |
def saveUsage (entry, str) : |
98 |
list = entry['N'] |
99 |
list.append(str); |
100 |
return |
101 |
def saveHistory (entry, str) : |
102 |
list = entry['HN'] |
103 |
list.append(str); |
104 |
return |
105 |
def saveChange (entry, str) : |
106 |
list = entry['CN'] |
107 |
list.append(str); |
108 |
return |
109 |
def saveEquals (entry, str) : |
110 |
list = entry['='] |
111 |
list.append(str); |
112 |
return |
113 |
# PRINTS HANDY LIST OF LETTER LINKS |
114 |
def TOC(strm,cols,letter,label) : |
115 |
if cols > 0 and cols < NCOLS : |
116 |
strm.write('</TR>\n') |
117 |
strm.write('<TR><TD> </TD></TR>\n') |
118 |
strm.write('<TR>\n') |
119 |
strm.write('<TD BGCOLOR="black" COLSPAN="%s"><FONT COLOR="white">%s</FONT> \n' % (NCOLS,label)) |
120 |
AA = 'A' |
121 |
aa = ord(AA) |
122 |
ZZ = 'Z' |
123 |
zz = ord(ZZ) |
124 |
while aa <= zz : |
125 |
if letter != '' and letter == AA : |
126 |
strm.write('<A NAME="%s"/><B><FONT SIZE="+2" COLOR="white">%s</FONT></B></A>\n' % (letter,letter)) |
127 |
else : |
128 |
strm.write('<A HREF="#%s"><FONT COLOR="white">%s</FONT></A>\n' % (AA,AA)) |
129 |
aa += 1 |
130 |
AA = chr(aa) |
131 |
strm.write('</TD></TR>\n') |
132 |
strm.write('<TR><TD HEIGHT="15"/></TR>\n') |
133 |
cols = 0 |
134 |
return |
135 |
def TOC2(strm,letter) : |
136 |
strm.write('<TR>\n') |
137 |
strm.write('<TD BGCOLOR="black" COLSPAN="%s"> \n' % (MCOLS)) |
138 |
AA = 'A' |
139 |
aa = ord(AA) |
140 |
ZZ = 'Z' |
141 |
zz = ord(ZZ) |
142 |
while aa <= zz : |
143 |
if letter != '' and letter == AA : |
144 |
strm.write('<FONT COLOR="white" SIZE="+2"><B>%s</B></FONT>\n' % (AA)) |
145 |
else : |
146 |
strm.write('<A HREF="./%s.html"><FONT COLOR="white">%s</FONT></A>\n' % (AA,AA)) |
147 |
aa += 1 |
148 |
AA = chr(aa) |
149 |
strm.write('</TD></TR>\n') |
150 |
strm.write('<TR><TD HEIGHT="10"/></TR>\n') |
151 |
strm.write('<TR><TH>Description</TH><TH>alt</TH><TH>broader</TH><TH>narrower</TH><TH>related</TH></TR>') |
152 |
return |
153 |
# PRINTS HTML HEADERS |
154 |
def header (strm) : |
155 |
strm.write('<HTML>\n') |
156 |
strm.write('<HEAD>\n') |
157 |
strm.write('<TITLE>'+root+' Thesaurus</TITLE>\n') |
158 |
strm.write('<LINK HREF="./doc.css" TYPE="text/css" REL="stylesheet">\n') |
159 |
strm.write('</HEAD>\n') |
160 |
strm.write('<BODY BGCOLOR="white" TEXT="black" LINK="black">\n') |
161 |
strm.write('<FONT FACE="sans-serif">\n') |
162 |
strm.write('<TABLE>\n') |
163 |
return |
164 |
# PRINTS HTML FOOT |
165 |
def finis (strm) : |
166 |
strm.write('</TABLE>\n') |
167 |
strm.write('<HR>\n') |
168 |
strm.write('%s\n' % (time.ctime(time.time()))) |
169 |
strm.write('</FONT>\n') |
170 |
strm.write('</BODY>\n') |
171 |
strm.write('</HTML>\n') |
172 |
strm.close() |
173 |
return |
174 |
# PRINT OUT CORRECTED TEXT FILE ENTRIES |
175 |
def newest () : |
176 |
nf = file('newest.txt','w') |
177 |
keys = dictionary.keys() |
178 |
keys.sort() |
179 |
for key in keys : |
180 |
entry = dictionary[key] |
181 |
label = entry['LABEL'] |
182 |
token = entry['TOKEN'] # label.replace(' ','_') |
183 |
orig = entry['ORIG'] |
184 |
alts = entry['ALT'] |
185 |
ds = entry['D'] |
186 |
bts = entry['BT'] |
187 |
nts = entry['NT'] |
188 |
rts = entry['RT'] |
189 |
revs = entry['REV'] |
190 |
ns = entry['N'] |
191 |
hns = entry['HN'] |
192 |
cns = entry['CN'] |
193 |
eqs = entry['='] |
194 |
nf.write ('## %s\n' % (orig)) |
195 |
nf.write ('%s\n' % (orig)) |
196 |
if len(alts) > 0 : |
197 |
nf.write('%s\n' % ('\tALT')) |
198 |
alts.sort() |
199 |
for alt in alts : |
200 |
nf.write('\t\t"%s"\n' % (alt)) |
201 |
if len(ds) > 0 : |
202 |
nf.write('%s\n' % ('\tD')) |
203 |
ds.sort() |
204 |
for d in ds : |
205 |
nf.write('\t\t%s\n' % (d)) |
206 |
if len(bts) > 0 : |
207 |
nf.write('%s\n' % ('\tBT')) |
208 |
bts.sort() |
209 |
for bt in bts : |
210 |
e = dictionary[keyify(bt)] |
211 |
l = e['LABEL'] |
212 |
nf.write('\t\t%s\n' % (l.replace(' ','_'))) |
213 |
if len(nts) > 0 : |
214 |
nf.write('%s\n' % ('\tNT')) |
215 |
nts.sort() |
216 |
for nt in nts : |
217 |
e = dictionary[keyify(nt)] |
218 |
l = e['LABEL'] |
219 |
nf.write('\t\t%s\n' % (l.replace(' ','_'))) |
220 |
if len(rts) > 0 : |
221 |
nf.write('%s\n' % ('\tRT')) |
222 |
rts.sort() |
223 |
for rt in rts : |
224 |
e = dictionary[keyify(rt)] |
225 |
l = e['LABEL'] |
226 |
nf.write('\t\t%s\n' % (l.replace(' ','_'))) |
227 |
if len(revs) > 0 : |
228 |
nf.write('%s\n' % ('\tREV')) |
229 |
revs.sort() |
230 |
for rev in revs : |
231 |
nf.write('\t\t%s\n' % (rev)) |
232 |
if len(hns) > 0 : |
233 |
nf.write('%s\n' % ('\tHN')) |
234 |
hns.sort() |
235 |
for hn in hns : |
236 |
nf.write('\t\t%s\n' % (hn)) |
237 |
if len(cns) > 0 : |
238 |
nf.write('%s\n' % ('\tCN')) |
239 |
cns.sort() |
240 |
for cn in cns : |
241 |
nf.write('\t\t%s\n' % (cn)) |
242 |
if len(ns) > 0 : |
243 |
nf.write('%s\n' % ('\tN')) |
244 |
ns.sort() |
245 |
for n in ns : |
246 |
nf.write('\t\t%s\n' % (n)) |
247 |
if len(eqs) > 0 : |
248 |
nf.write('%s\n' % ('\t=')) |
249 |
eqs.sort() |
250 |
for eq in eqs : |
251 |
nf.write('\t\t%s\n' % (eq)) |
252 |
nf.close() |
253 |
return |
254 |
# SIMPLE STATS FOR DICTIONARY |
255 |
def stats(dict) : |
256 |
nbt = 0 |
257 |
nnt = 0 |
258 |
nrt = 0 |
259 |
neq = 0 |
260 |
ntop = 0 |
261 |
for key in keys : |
262 |
entry = dict[key]; |
263 |
if len(entry['BT']) > 0 : |
264 |
nbt += 1 |
265 |
if len(entry['NT']) > 0 : |
266 |
nnt += 1 |
267 |
if len(entry['RT']) > 0 : |
268 |
nrt += 1 |
269 |
if len(entry['=']) > 0 : |
270 |
neq += 1 |
271 |
if len(entry['NT']) > 0 and len(entry['BT']) == 0 : |
272 |
ntop += 1 |
273 |
print 'Number of entries : ',len(dict) |
274 |
print 'Number of explicit narrower entries (with BTs) : ',nbt |
275 |
print 'Number of explicit broader entries (with NTs) : ',nnt |
276 |
print 'Number of entries with references (with RTs) : ',nrt |
277 |
print 'Number of entries with x-references (with =) : ',neq |
278 |
print 'Number of top entries (with NTs but no BTs) : ',ntop |
279 |
f = file('./NUMBER_OF_ENTRIES','w') |
280 |
f.write('%s' % len(dict)) |
281 |
f.close() |
282 |
f = file('./NUMBER_OF_TOP_ENTRIES','w') |
283 |
f.write('%s' % (ntop)) |
284 |
f.close() |
285 |
return |
286 |
|
287 |
|
288 |
# ----- END OF FUNCTIONS |
289 |
|
290 |
|
291 |
# ----- PARSE COMMAND LINE |
292 |
|
293 |
i = 0 |
294 |
for arg in sys.argv : |
295 |
if i > 0 : |
296 |
if arg.startswith('--root=') : |
297 |
root = arg[7:].replace('"','') |
298 |
# print 'New root=',root |
299 |
elif arg.startswith('--path=') : |
300 |
path = arg[7:].replace('"','') |
301 |
# print 'New path=',path |
302 |
else : |
303 |
print 'Syntax:' |
304 |
print '\tpython thesaurus {--root=root-name} {--path=RDF-path}' |
305 |
print 'Current defaults:\n\troot=',root,'\n\tpath=',path |
306 |
sys.exit(1) |
307 |
i += 1 |
308 |
|
309 |
# ----- DEFINE DICTIONARIES AND LISTS |
310 |
|
311 |
ulist = [] # LIST OF X U(F) Y REFERENCES |
312 |
btlist = [] # LIST OF X BT Y REFERENCES |
313 |
adict = {} # DICTIONARY OF [alias,token] PAIRS |
314 |
reflist = [] # LIST OF REFERENCED NAMESPACES |
315 |
dictionary = {} # DICTIONARY OF COMPLETE ENTRIES |
316 |
entry = {} # CONSISTING OF : KEY, TOKEN, LABEL, DICTIONARY OF DESCRIPTIONS, ALIASES, BT'S, RT'S, NT'S |
317 |
|
318 |
# ----- PARSE EACH LINE OF VOCABULARY FILE |
319 |
|
320 |
infile = file(root+'.txt','r') |
321 |
type = '' |
322 |
token = '' |
323 |
current = '' |
324 |
for row in infile : |
325 |
line = row.replace('\n','').replace('\r','') |
326 |
# print line |
327 |
|
328 |
# ----- ALMOST IGNORE BLANKS OR COMMENTS |
329 |
|
330 |
if line.startswith('##') : # IGNORE TOKEN LABEL COMMENTS - WILL BE PUT BACK ON IF newest() IS INVOKED |
331 |
type = '' |
332 |
|
333 |
elif line.startswith('#') or line.strip() == '' : |
334 |
print line |
335 |
|
336 |
# ----- CHECK FOR BT |
337 |
|
338 |
elif line.startswith('\tBT') : |
339 |
type = 'BT' |
340 |
|
341 |
# ----- CHECK FOR NT |
342 |
|
343 |
elif line.startswith('\tNT') : |
344 |
type = 'NT' |
345 |
|
346 |
# ----- CHECK FOR RT |
347 |
|
348 |
elif line.startswith('\tRT') : |
349 |
type = 'RT' |
350 |
|
351 |
# ----- CHECK FOR A(LT) |
352 |
|
353 |
elif line.startswith('\tA') : |
354 |
type = 'ALT' |
355 |
|
356 |
# ----- CHECK FOR UF |
357 |
|
358 |
elif line.startswith('\tUF') : |
359 |
type = 'UF' |
360 |
|
361 |
# ----- CHECK FOR U |
362 |
|
363 |
elif line.startswith('\tU') : |
364 |
type = 'U' |
365 |
|
366 |
# ----- CHECK FOR HISTORY NOTE |
367 |
|
368 |
elif line.startswith('\tHN') : |
369 |
type = 'HN' |
370 |
|
371 |
# ----- CHECK FOR CHANGE NOTE |
372 |
|
373 |
elif line.startswith('\tCN') : |
374 |
type = 'CN' |
375 |
|
376 |
# ----- CHECK FOR USAGE NOTE |
377 |
|
378 |
elif line.startswith('\tN') : |
379 |
type = 'N' |
380 |
|
381 |
# ----- CHECK FOR OLD REVISION |
382 |
|
383 |
elif line.startswith('\tREV') : |
384 |
type = 'REV' |
385 |
|
386 |
# ----- CHECK FOR DESCRIPTION |
387 |
|
388 |
elif line.startswith('\tD') : |
389 |
type = 'D' |
390 |
|
391 |
# ----- CHECK FOR CROSS-REFERENCES |
392 |
|
393 |
elif line.startswith('\t=') : |
394 |
type = '=' |
395 |
|
396 |
# ----- MAIN TOKEN ENTRY |
397 |
|
398 |
elif not line.startswith('\t') : |
399 |
entry = saveEntry(entry,dictionary) # SAVE PREVIOUS ENTRY |
400 |
|
401 |
type = 'token' |
402 |
str = line.strip() |
403 |
token = tokenize(str) |
404 |
current = str.replace('_',' ') |
405 |
saveToken (entry,token,current,str) |
406 |
|
407 |
# ----- PROCESS CONTENT |
408 |
|
409 |
else : |
410 |
str = line.strip() |
411 |
# if str.endswith('s') : |
412 |
# print str |
413 |
if type == 'BT' : |
414 |
saveBroader (entry,str) |
415 |
btlist.append([token,tokenize(str)]) # token BT str |
416 |
elif type == 'NT' : |
417 |
saveNarrower (entry,str) |
418 |
btlist.append([tokenize(str),token]) # str BT token |
419 |
elif type == 'RT' : |
420 |
saveRelated (entry,str) |
421 |
elif type == 'HN' : |
422 |
saveHistory (entry,str) |
423 |
elif type == 'CN' : |
424 |
saveChange (entry,str) |
425 |
elif type == 'N' : |
426 |
saveUsage (entry,str) |
427 |
elif type == 'REV' : |
428 |
saveUsage (entry,str) |
429 |
elif type == '=' : |
430 |
saveEquals (entry,str) |
431 |
partz = str.split(':') |
432 |
if len(partz) > 1 : |
433 |
reflist.append(partz[0]) |
434 |
elif type == 'D' : |
435 |
saveDescription (entry,str) |
436 |
elif type == 'ALT' or type == 'UF' : |
437 |
saveAlternate (entry,str) |
438 |
adict[keyify(str)] = [str,token] |
439 |
elif type == 'U' : # token |
440 |
ulist.append(token) |
441 |
|
442 |
entry = saveEntry(entry,dictionary) |
443 |
keys = dictionary.keys() # LIST OF ALL KEY'S |
444 |
keys.sort() |
445 |
stats(dictionary) |
446 |
|
447 |
# ----- REMOVE ENTRIES WITH U REFERENCES (ASSUMING THEY ARE JUST ALIASES) |
448 |
|
449 |
print 'Removing redundant alias entries...' |
450 |
for token in ulist : |
451 |
key = keyify(token) |
452 |
print '\tdeleting',token |
453 |
if key in dictionary : |
454 |
del dictionary[key] |
455 |
keys = dictionary.keys() # LIST OF ALL TOKENS |
456 |
keys.sort() |
457 |
|
458 |
# ----- CHECK TO SEE IF ALL THE NT'S AND BT'S ARE SET |
459 |
|
460 |
print 'Checking BT/NT cross-references....' |
461 |
for dBTp in btlist : # FOR EACH SET OF daughter BT parent's, STORED AS [daughter,parent], LOOK IF THERE ARE BOTH ENTRIES |
462 |
daughter = dBTp[0] |
463 |
parent = dBTp[1] |
464 |
|
465 |
# ----- CHECK BT'S |
466 |
|
467 |
key = keyify(daughter) |
468 |
if not key in keys : |
469 |
print 'Checking BT: CANNOT FIND',key,'IN DICTIONARY KEYS!' |
470 |
sys.exit(1) |
471 |
entry = dictionary[key] |
472 |
bts = entry['BT'] |
473 |
if not parent in bts : |
474 |
print '\tBT ',parent,'missing in',daughter |
475 |
bts.append(parent) |
476 |
|
477 |
# ----- CHECK NT's |
478 |
|
479 |
key = keyify(parent) |
480 |
if not key in keys : |
481 |
print 'Checking NT: CANNOT FIND',key,'IN DICTIONARY KEYS!' |
482 |
sys.exit(1) |
483 |
entry = dictionary[key] |
484 |
nts = entry['NT'] |
485 |
if not daughter in nts : |
486 |
print '\tNT ',daughter,'missing in',parent |
487 |
nts.append(daughter) |
488 |
|
489 |
# ----- CHECK TO SEE IF ALL THE RT'S ARE SYMMETRIC |
490 |
|
491 |
print 'Checking for complete RT linking....' |
492 |
for key in keys : |
493 |
entry = dictionary[key] |
494 |
token = entry['TOKEN'] |
495 |
rts = entry['RT'] |
496 |
# FOR EACH TOKEN'S RT CONNECTION... |
497 |
for rt in rts : |
498 |
kee = keyify(rt) |
499 |
e = dictionary[kee] |
500 |
t = e['TOKEN'] |
501 |
refs = e['RT'] |
502 |
if not token in refs : |
503 |
print '\tadding',token,'to list of RTs for',t |
504 |
refs.append(token) |
505 |
stats(dictionary) |
506 |
|
507 |
# ---- CREATE HTML LIST OF TOKENS |
508 |
|
509 |
print 'Creating list of tokens...' |
510 |
|
511 |
f = file('tokens.html','w') |
512 |
header(f) |
513 |
TOC(f,0,'A',root+' tokens:') |
514 |
|
515 |
letter = 'A' |
516 |
a = ord(letter) |
517 |
current = '' |
518 |
started = False |
519 |
col = 0 |
520 |
|
521 |
for key in keys : |
522 |
entry = dictionary[key] |
523 |
token = entry['TOKEN'] |
524 |
c = token[0:1].upper() |
525 |
if c == letter : |
526 |
started = True |
527 |
while started and letter != c and letter != 'Z' : |
528 |
a += 1 |
529 |
letter = chr(a) |
530 |
TOC(f,col,letter,root+' tokens:') |
531 |
col = 0 |
532 |
if col == 0 : |
533 |
f.write('<TR>\n') |
534 |
col = 1 |
535 |
f.write('<TD>\n') |
536 |
f.write('<A NAME="%s"/><A HREF="./dict/%s.html#%s">%s</A>\n' % (token,letter,token,token)) |
537 |
f.write('</TD>\n') |
538 |
col += 1 |
539 |
if col > NCOLS : |
540 |
f.write('</TR>\n') |
541 |
col = 0 |
542 |
finis(f) |
543 |
|
544 |
# ---- HTML LIST OF LABELS |
545 |
|
546 |
print 'Creating list of labels...' |
547 |
|
548 |
f = file('labels.html','w') |
549 |
header(f) |
550 |
TOC(f,0,'A',root+' labels:') |
551 |
|
552 |
letter = 'A' |
553 |
a = ord(letter) |
554 |
current = '' |
555 |
started = False |
556 |
col = 0 |
557 |
|
558 |
keys = dictionary.keys() |
559 |
keys.sort() |
560 |
for key in keys : |
561 |
entry = dictionary[key] |
562 |
token = entry['TOKEN'] |
563 |
current = entry['LABEL'] |
564 |
c = token[0:1].upper() |
565 |
if c == letter : |
566 |
started = True |
567 |
while started and letter != c and letter != 'Z' : |
568 |
a += 1 |
569 |
letter = chr(a) |
570 |
TOC(f,col,letter,root+' labels:') |
571 |
col = 0 |
572 |
if col == 0 : |
573 |
f.write('<TR>\n') |
574 |
col = 1 |
575 |
f.write('<TD>\n') |
576 |
f.write('<A NAME="%s"/><A HREF="./dict/%s.html#%s">%s</A>\n' % (token,letter,token,current)) |
577 |
f.write('</TD>\n') |
578 |
col += 1 |
579 |
if col > NCOLS : |
580 |
f.write('</TR>\n') |
581 |
col = 0 |
582 |
finis(f) |
583 |
|
584 |
# ----- DICTIONARY AND RDF FILE |
585 |
|
586 |
print 'Creating dictionary and RDF file ...' |
587 |
|
588 |
f = file(root+'.rdf','w') |
589 |
f.write('<?xml version="1.0"?>\n') |
590 |
f.write('<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"\n') |
591 |
f.write('\txmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"\n') |
592 |
f.write('\txmlns:dc="http://purl.org/dc/elements/1.1/"\n') |
593 |
f.write('\txmlns:skos="http://www.w3.org/2004/02/skos/core#"\n') |
594 |
# f.write('\txmlns:owl="http://www.w3.org/2002/07/owl#"\n') |
595 |
if 'IAU93' in reflist : |
596 |
f.write('\txmlns:IAU93="%sIAU93#"\n' % (path)) |
597 |
if 'IVOAT' in reflist : |
598 |
f.write('\txmlns:IVOAT="%sIVOAT#"\n' % (path)) |
599 |
if 'UCD1' in reflist : |
600 |
f.write('\txmlns:UCD1="%sUCD1#"\n' % (path)) |
601 |
if 'math' in reflist : |
602 |
f.write('\txmlns:math="%smath#"\n' % (path)) |
603 |
f.write('\txml:base="%s" >\n' % (path+root)) |
604 |
f.write('\t<skos:ConceptScheme rdf:about="">\n') |
605 |
f.write('\t\t<dc:title>'+root+' Thesaurus - Version '+time.ctime(time.time())+'</dc:title>\n') |
606 |
|
607 |
for key in keys : |
608 |
entry = dictionary[key] |
609 |
token = entry['TOKEN'] |
610 |
current = entry['LABEL'] |
611 |
bts = entry['BT'] |
612 |
nts = entry['NT'] |
613 |
if len(nts) > 0 and len(bts) == 0 : # NARROWER TERMS KNOWN BUT NO BROADER |
614 |
f.write('\t\t<skos:hasTopConcept rdf:resource="#%s"/>\n' % (token)) |
615 |
f.write('\t</skos:ConceptScheme>\n') |
616 |
|
617 |
dictfile = file('dict/A.html','w') |
618 |
header(dictfile) |
619 |
TOC2(dictfile,'A') |
620 |
|
621 |
letter = 'A' |
622 |
a = ord(letter) |
623 |
current = '' |
624 |
started = False |
625 |
col = 0 |
626 |
n = 0 |
627 |
for key in keys : |
628 |
entry = dictionary[key] |
629 |
token = entry['TOKEN'] |
630 |
current = entry['LABEL'] |
631 |
|
632 |
c = current[0:1].upper() |
633 |
if c == letter : |
634 |
started = True |
635 |
while started and letter != c and letter != 'Z' : |
636 |
a += 1 |
637 |
letter = chr(a) |
638 |
finis(dictfile) |
639 |
str = './dict/'+letter+'.html' |
640 |
dictfile = file(str,'w') |
641 |
header(dictfile) |
642 |
TOC2(dictfile,letter) |
643 |
|
644 |
|
645 |
# ----- WRITE TOKENS, LABELS, AND EQUIVALENTS |
646 |
|
647 |
dictfile.write('<TR><TD COLSPAN="%d" BGCOLOR="#C9C9C9"><A NAME="%s"/>\n' % (MCOLS,token)) |
648 |
dictfile.write('<B>%s</B> (<TT>%s</TT>)\n' % (current,token)) |
649 |
i = 0 |
650 |
for str in entry['='] : |
651 |
if i == 0 : |
652 |
dictfile.write(' = ') |
653 |
if i > 0 : |
654 |
dictfile.write(', ') |
655 |
partz = str.split(':') |
656 |
if len(partz) > 1 : |
657 |
nspace = partz[0] |
658 |
tok = partz[1] |
659 |
c = tok[0:1].upper() |
660 |
dictfile.write('<A TARGET="_blank" HREF="%s/%s/dict/%s.html#%s">%s</A>' % (path,nspace,c,tok,str)) |
661 |
else : |
662 |
dictfile.write('%s' % (str)) |
663 |
i += 1 |
664 |
dictfile.write('</TD></TR>\n') |
665 |
|
666 |
f.write('\t<skos:Concept rdf:about="#%s">\n' % (token)) |
667 |
f.write('\t\t<skos:inScheme rdf:resource=""/>\n') |
668 |
f.write('\t\t<skos:prefLabel>%s</skos:prefLabel>\n' % (current)) |
669 |
|
670 |
dictfile.write('<TR>') |
671 |
|
672 |
# ---- WRITE DESCRIPTIONS |
673 |
|
674 |
dictfile.write('<TD BGCOLOR="#E7E7E7">\n') |
675 |
if len(entry['D']) > 0 : |
676 |
for str in entry['D'] : |
677 |
dictfile.write('%s<BR>\n' % (str.replace('"',''))) |
678 |
f.write('\t\t<skos:definition>%s</skos:definition>\n' % (str.replace('"',''))) |
679 |
dictfile.write('</TD>\n') |
680 |
|
681 |
# ---- WRITE ALTLABELS |
682 |
|
683 |
dictfile.write('<TD BGCOLOR="#DDDDDD">\n') |
684 |
i = 0 |
685 |
for str in entry['ALT'] : |
686 |
if i > 0 : |
687 |
dictfile.write(',<BR>') |
688 |
dictfile.write('%s\n' % (str)) |
689 |
i += 1 |
690 |
f.write('\t\t<skos:altLabel>%s</skos:altLabel>\n' % (str.replace('"',''))) |
691 |
dictfile.write('</TD>\n') |
692 |
|
693 |
# ---- WRITE BT |
694 |
|
695 |
dictfile.write('<TD BGCOLOR="#CCCCCC">\n') |
696 |
i = 0 |
697 |
for str in entry['BT'] : |
698 |
kee = keyify(str) |
699 |
if i > 0 : |
700 |
dictfile.write(',<BR>') |
701 |
btentry = dictionary[kee] |
702 |
btlabel = btentry['LABEL'] |
703 |
btchar = btlabel[0:1].upper() |
704 |
dictfile.write('<A HREF="./%s.html#%s">%s</A>\n' % (btchar,str,btlabel)) |
705 |
i += 1 |
706 |
f.write('\t\t<skos:broader rdf:resource="#%s"/>\n' % (str)) |
707 |
dictfile.write('</TD>\n') |
708 |
|
709 |
# ---- WRITE NT |
710 |
|
711 |
dictfile.write('<TD BGCOLOR="#BBBBBB">\n') |
712 |
i = 0 |
713 |
for str in entry['NT'] : |
714 |
kee = keyify(str) |
715 |
if i > 0 : |
716 |
dictfile.write(',<BR>') |
717 |
ntentry = dictionary[kee] |
718 |
ntlabel = ntentry['LABEL'] |
719 |
ntchar = ntlabel[0:1].upper() |
720 |
dictfile.write('<A HREF="./%s.html#%s">%s</A>\n' % (ntchar,str,ntlabel)) |
721 |
i += 1 |
722 |
f.write('\t\t<skos:narrower rdf:resource="#%s"/>\n' % (str)) |
723 |
dictfile.write('</TD>\n') |
724 |
|
725 |
# ---- WRITE RT |
726 |
|
727 |
dictfile.write('<TD BGCOLOR="#AAAAAA">\n') |
728 |
i = 0 |
729 |
for str in entry['RT'] : |
730 |
kee = keyify(str) |
731 |
if i > 0 : |
732 |
dictfile.write(',<BR>') |
733 |
rtentry = dictionary[kee] |
734 |
rtlabel = rtentry['LABEL'] |
735 |
rtchar = rtlabel[0:1].upper() |
736 |
dictfile.write('<A HREF="./%s.html#%s">%s</A>\n' % (rtchar,str,rtlabel)) |
737 |
i += 1 |
738 |
f.write('\t\t<skos:related rdf:resource="#%s"/>\n' % (str)) |
739 |
dictfile.write('</TD>\n') |
740 |
|
741 |
|
742 |
# ---- WRITE HISTORY NOTES |
743 |
|
744 |
if len(entry['HN']) > 0 : |
745 |
dictfile.write('<TR><TD COLSPAN="%d"><FONT SIZE="-1">' % (MCOLS)) |
746 |
for str in entry['HN'] : |
747 |
dictfile.write('%s<BR>\n' % (str.replace('"',''))) |
748 |
f.write('\t\t<skos:historyNote>%s</skos:historyNote>\n' % (str.replace('"',''))) |
749 |
dictfile.write('</FONT></TD></TR>\n') |
750 |
|
751 |
# ---- WRITE CHANGE NOTES |
752 |
|
753 |
if len(entry['CN']) > 0 : |
754 |
dictfile.write('<TR><TD COLSPAN="%d"><FONT SIZE="-1">' % (MCOLS)) |
755 |
for str in entry['CN'] : |
756 |
dictfile.write('%s<BR>\n' % (str.replace('"',''))) |
757 |
f.write('\t\t<skos:changeNote>%s</skos:changeNote>\n' % (str.replace('"',''))) |
758 |
dictfile.write('</FONT></TD></TR>\n') |
759 |
|
760 |
# ---- WRITE USAGE NOTES |
761 |
|
762 |
if len(entry['N']) > 0 : |
763 |
dictfile.write('<TR><TD COLSPAN="%d"><FONT SIZE="-1">' % (MCOLS)) |
764 |
for str in entry['N'] : |
765 |
dictfile.write('%s<BR>\n' % (str.replace('"',''))) |
766 |
f.write('\t\t<skos:scopeNote>%s</skos:scopeNote>\n' % (str.replace('"',''))) |
767 |
dictfile.write('</FONT></TD></TR>\n') |
768 |
|
769 |
# ---- WRITE REVISIONS |
770 |
|
771 |
if len(entry['REV']) > 0 : |
772 |
dictfile.write('<TR><TD COLSPAN="%d"><FONT COLOR="red" SIZE="-1">Revisions: \n' % (MCOLS)) |
773 |
for str in entry['REV'] : |
774 |
dictfile.write('%s<BR>\n' % (str.replace('"',''))) |
775 |
dictfile.write('</FONT></TD></TR>\n') |
776 |
|
777 |
# ---- WRITE EQUIVALENTS TO RDF FILE |
778 |
|
779 |
for str in entry['='] : |
780 |
f.write('\t\t<skos:related rdf:resource="%s"/>\n' % (str)) |
781 |
|
782 |
# ---- FINISH ENTRY |
783 |
|
784 |
dictfile.write('<TR><TD COLSPAN="%d" HEIGHT="20"> </TD></TR>' % (MCOLS)) |
785 |
n += 1 |
786 |
if n == 10 : |
787 |
TOC2(dictfile,letter) |
788 |
n = 0 |
789 |
f.write('\t</skos:Concept>\n') |
790 |
|
791 |
finis(dictfile) |
792 |
f.write('</rdf:RDF>') |
793 |
f.close() |
794 |
|
795 |
# ---- HTML LIST OF ALIASES |
796 |
|
797 |
f = file('aliases.html','w') |
798 |
header(f) |
799 |
TOC(f,0,'A',root+' aliases:') |
800 |
|
801 |
letter = 'A' |
802 |
a = ord(letter) |
803 |
current = '' |
804 |
started = False |
805 |
col = 0 |
806 |
|
807 |
keys = adict.keys() |
808 |
keys.sort() |
809 |
for key in keys : |
810 |
list = adict[key] |
811 |
alias = list[0].replace('"','') |
812 |
token = list[1] |
813 |
c = key[0:1].upper() |
814 |
if c == letter : |
815 |
started = True |
816 |
while started and letter != c and letter != 'Z' : |
817 |
a += 1 |
818 |
letter = chr(a) |
819 |
TOC(f,col,letter,root+' aliases:') |
820 |
col = 0 |
821 |
if col == 0 : |
822 |
f.write('<TR>\n') |
823 |
col = 1 |
824 |
f.write('<TD>\n') |
825 |
f.write('<A NAME="%s"/><A HREF="./dict/%s.html#%s">%s</A>\n' % (alias,letter,token,alias)) |
826 |
f.write('</TD>\n') |
827 |
col += 1 |
828 |
if col > NCOLS : |
829 |
f.write('</TR>\n') |
830 |
col = 0 |
831 |
finis(f) |
832 |
|
833 |
newest() |