#!/usr/bin/env python3 # find keywords and exports them # SPDX-FileCopyrightText: 2022 Beat Jäckle # SPDX-License-Identifier: GPL-3.0-or-later # from datareader import DataReader from gnukeywords import GnuKeywords from hmlttotext import HmltToText from json import dump as jsondump def main(args): # Objekte initiieren keywords = dict() datareader = DataReader(datadir='./data/') gnukeywords = GnuKeywords(lang='de') hmltToText = HmltToText() # Keywords in das dictionary schreiben for articlename, htmlstr in datareader: text = hmltToText(htmlstr) keywords[articlename] = gnukeywords(text) # Keywords speichern with open('tags.json', 'w') as f: jsondump(keywords, f, indent=4, ensure_ascii=False) return 0 if __name__ == '__main__': import sys sys.exit(main(sys.argv))