gnu-keywords/main.py

35 lines
874 B
Python
Raw Permalink Normal View History

2022-05-18 18:51:42 +02:00
#!/usr/bin/env python3
# find keywords and exports them
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
# SPDX-License-Identifier: GPL-3.0-or-later
#
from datareader import DataReader
from gnukeywords import GnuKeywords
from hmlttotext import HmltToText
from json import dump as jsondump
def main(args):
# Objekte initiieren
keywords = dict()
datareader = DataReader(datadir='./data/')
gnukeywords = GnuKeywords(lang='de')
hmltToText = HmltToText()
# Keywords in das dictionary schreiben
for articlename, htmlstr in datareader:
text = hmltToText(htmlstr)
keywords[articlename] = gnukeywords(text)
# Keywords speichern
with open('tags.json', 'w') as f:
jsondump(keywords, f, indent=4, ensure_ascii=False)
return 0
if __name__ == '__main__':
import sys
sys.exit(main(sys.argv))