-
Notifications
You must be signed in to change notification settings - Fork 0
/
workflow_teihdt.py
54 lines (35 loc) · 1.66 KB
/
workflow_teihdt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Annotation Workflow
- converts TEI master files to annotated TEI files
- annotation with HeidelTime
The final results are stored in a folder "teia".
Run this file directly.
OBS: HeidelTime uses TreeTagger for the linguistic annotation of the corpus files. Before using this workflow,
the path to the TreeTagger installation needs to be set in the file config.props in the HeidelTime directory.
Here the standalone version of HeidelTime is used.
@author: Ulrike Henny-Krahmer
@filename: workflow_teihdt.py
"""
############ Options ##############
# where the TEI master files are
infolder = "/home/ulrike/Git/conha19/tei"
# where the annotation working files and results should go
outfolder = "/home/ulrike/Git/conha19/heideltime"
# language of the texts (tested for: FRENCH, SPANISH, ITALIAN, PORTUGUESE)
lang = "SPANISH"
# path to heideltime installation
heideltimePath = "/home/ulrike/Programme/heideltime-standalone-2.2.1"
import sys
import os
# use the following to add a path to syspath (if needed):
#sys.path.append(os.path.abspath("/home/ulrike/Git/"))
import prepare_tei
import use_heideltime
# by default, it should be enough to change the options above and leave this as is
#prepare_tei.prepare("split-1", infolder, outfolder)
#use_heideltime.apply_ht(heideltimePath, os.path.join(outfolder, "txt"), os.path.join(outfolder, "hdt"), lang)
#use_heideltime.debug_ampersands(os.path.join(outfolder, "hdt"), os.path.join(outfolder, "anno_pre"))
#use_heideltime.wrap_body(os.path.join(outfolder, "anno_pre"), os.path.join(outfolder, "annotated_temp"))
#prepare_tei.prepare("merge-hdt", outfolder, os.path.join(outfolder, "teia"))