20 lines
446 B
Python
20 lines
446 B
Python
|
#
|
||
|
# find keywords in articles and returns them
|
||
|
# SPDX-FileCopyrightText: 2022 Beat Jäckle <beat@git,jdmweb2.ch>
|
||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||
|
#
|
||
|
import spacy
|
||
|
|
||
|
|
||
|
class GnuKeywords:
|
||
|
modules = {
|
||
|
'de': 'de_core_news_sm',
|
||
|
'en': 'en_core_web_lg'
|
||
|
}
|
||
|
|
||
|
def __init__(self, lang):
|
||
|
self.lang = lang # de_core_news_sm or
|
||
|
|
||
|
def __call__(self, htmlstr):
|
||
|
return ['gnu', 'linux', 'for', 'ever', 'love']
|