34 lines
874 B
Python
Executable file
34 lines
874 B
Python
Executable file
#!/usr/bin/env python3
|
|
# find keywords and exports them
|
|
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
|
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
|
#
|
|
from datareader import DataReader
|
|
from gnukeywords import GnuKeywords
|
|
from hmlttotext import HmltToText
|
|
from json import dump as jsondump
|
|
|
|
|
|
def main(args):
|
|
|
|
# Objekte initiieren
|
|
keywords = dict()
|
|
datareader = DataReader(datadir='./data/')
|
|
gnukeywords = GnuKeywords(lang='de')
|
|
hmltToText = HmltToText()
|
|
|
|
# Keywords in das dictionary schreiben
|
|
for articlename, htmlstr in datareader:
|
|
text = hmltToText(htmlstr)
|
|
keywords[articlename] = gnukeywords(text)
|
|
|
|
# Keywords speichern
|
|
with open('tags.json', 'w') as f:
|
|
jsondump(keywords, f, indent=4, ensure_ascii=False)
|
|
|
|
return 0
|
|
|
|
|
|
if __name__ == '__main__':
|
|
import sys
|
|
sys.exit(main(sys.argv))
|