You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2548 lines
130 KiB
2548 lines
130 KiB
import nltk
|
|
from nltk.stem.snowball import GermanStemmer
|
|
|
|
# import timeit
|
|
stemmer = GermanStemmer()
|
|
from nltk.corpus import stopwords
|
|
|
|
import numpy as np
|
|
import random
|
|
import os
|
|
import inspect
|
|
import tensorflow as tf
|
|
import tflearn
|
|
|
|
import re
|
|
import string
|
|
|
|
import fnmatch
|
|
import os
|
|
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
tf.compat.v1.reset_default_graph()
|
|
|
|
##print('Ich bin in knn_ae!!!!!!!')
|
|
|
|
import pickle
|
|
import json
|
|
|
|
import fnmatch
|
|
import os
|
|
import inspect
|
|
|
|
|
|
def getJsonPath():
|
|
path1 = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
|
path = os.path.join(path1, 'models/chat.json').replace("\\", "/")
|
|
return path
|
|
|
|
|
|
def getJsonPathEnt():
|
|
path1 = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
|
path = os.path.join(path1, 'models/chat.json').replace("\\", "/")
|
|
return path
|
|
|
|
|
|
#########lade json##################
|
|
with open(getJsonPath(), encoding='UTF-8') as json_data:
|
|
dialogflow = json.load(json_data)
|
|
# ##print(dialogflow)
|
|
|
|
##########Schreibfehler Korrektur
|
|
from spellchecker import SpellChecker
|
|
|
|
spell = SpellChecker(language='de')
|
|
|
|
spell.word_frequency.load_words \
|
|
(["feedback_positiv", '!', '2', 'ab', 'abend', 'abgeben', 'abkürzung', 'adresse', 'adresse_infoorte', 'aktiv', 'alles', 'alt', 'amt', 'an', 'and', 'anmelden', 'ansprechen', 'ansprechpartner', 'antworten', 'are', 'arsch', 'arschgesicht', 'arschloch', 'auf', 'ausfüllen', 'bald', 'beantragen', 'bearbeitungsdauer', 'bedeutet', 'behörde', 'bei', 'bekacktes', 'bekomme', 'bescheid', 'bestellen', 'bezahlen', 'bis', 'bitch', 'blödian', 'brauche', 'brauchen', 'braucht', 'bye', 'ciao', 'dafür', 'dahin', 'dank', 'danke', 'darf', 'das', 'datum', 'dauert', 'dein', 'deine', 'dich', 'dir', 'du', 'e-mail', 'einreichen', 'einsehen', 'email', 'erfüllen', 'erreichbar', 'erreichbarkeit', 'erreichen', 'erzähl', 'erzähle', 'euro', 'existiert', 'fahrerlaubnis', 'fertig', 'fick', 'finde', 'flachzange', 'formulare', 'fotze', 'fragen', 'fresse', 'frida', 'frist', 'fristen', 'fuck', 'führerschein', 'führerscheinantrag', 'führerscheinklasse', 'für', 'geben', 'geboren', 'geburtstag', 'gebühr', 'gehen', 'geholfen', 'geht', 'gehts', 'gelten', 'genehmigung', 'geprüft', 'gesetz', 'gesetzliche', 'gesichtselfmeter', 'geöffnet', 'gibst', 'gibt', 'gott', 'grund', 'grüss', 'grüß', 'gut', 'guten', 'götter', 'göttinnen', 'hallo', 'halt', 'hast', 'hat', 'heißt', 'hello', 'heute', 'hey', 'hi', 'hilfe', 'hilfreich', 'hinblättern', 'hingehen', 'hoch', 'how', 'hurensohn', 'hürden', 'iban', 'ich', 'idiot', 'ihnen', 'ihre', 'in', 'info', 'informationen', 'infos', 'innerhalb', 'internet', 'irgendwas', 'ist', 'jemand', 'jemanden', 'joke', 'kackwurst', 'kann', 'kannst', 'kennst', 'klar', 'kommen', 'kopie', 'kosten', 'kostenlos', 'kostet', 'kriege', 'lang', 'lange', 'leben', 'locker', 'loser', 'lustiges', 'läuft', 'machst', 'macht', 'mal', 'maul', 'mehr', 'melden', 'mitbringen', 'morgen', 'muss', 'münzt', 'müssen', 'nach', 'nachweis', 'name', 'nicht', 'off', 'offen', 'online', 'onlineformular', 'opfer', 'original', 'papiere', 'paragraphen', 'passiert', 'penner', 'personalausweis', 'persönlich', 'privatklage', 'rathaus', 'rauszufinden', 'rechtlich', 'rechtsgrundlage', 'reicht', 'reisepass', 'rund', 'sagt', 'sbw_preamble', 'schauen', 'scheiße', 'schnell', 'schon', 'seelenloser', 'seelsorge', 'seit', 'selbstmord', 'servus', 'sex', 'sichtbar', 'sie', 'spreche', 'sprechen', 'spricht', 'spät', 'steht', 'stelle', 'stellt', 'sterben', 'suizid', 'tag', 'telefon', 'telefonnumer', 'telefonseelsorge', 'teuer', 'toll', 'tschüss', 'tun', 'uhr', 'uhrzeit', 'uhrzeiten', 'umbringen', 'ummelden', 'unterlagen', 'verabschiede', 'verfahren', 'verfahrensablauf', 'verordnung', 'viele', 'vielen', 'vollidiot', 'voraussetzungen', 'vorbeikommen', 'vorzeigen', 'wann', 'warten', 'warum', 'was', 'weit', 'weißt', 'welche', 'welches', 'wellche', 'wem', 'wen', 'wenden', 'wer', 'wichser', 'wie', 'wiedersehen', 'wieviel', 'wissen', 'witz', 'witze', 'wo', 'wochen', 'wofür', 'woher', 'you', 'zeig', 'zeit', 'zimmer', 'zu', 'zuständig', 'zuständige', 'öffnet', 'öffnungszeiten', '17', '18', 'a', 'ab', 'abgenommen', 'abhanden', 'alten', 'and', 'anhängerführerschein', 'auffindbar', 'ausland', 'ausländischer', 'b96', 'be', 'beantragen', 'befristet', 'befristeter', 'beförderung', 'begleitetes', 'begleitung', 'bekommen', 'bf', 'bf17', 'bus', 'c', 'ce', 'deutscher', 'einkassiert', 'entziehung', 'entzogen', 'entzug', 'erlaubnis', 'ersatz', 'ersatzführerschein', 'ersterteilung', 'erweitern', 'erweiterung', 'eu', 'eu-führerschein', 'fahren', 'fahrerlaubnis', 'fahrerlaubnisentzug', 'fahrgastbeförderung', 'finde', 'führerschein', 'führerscheinentzug', 'führerscheinerweiterung', 'führerscheinklasse', 'führerscheintausch', 'führerscheinumtausch', 'führerscheinverlust', 'führerscheinverlängerung', 'gefunden', 'gekommen', 'geändert', 'heirat', 'international', 'internationaler', 'jahren', 'jugendliche', 'karte', 'kartenführerschein', 'klasse', 'lappen', 'mehr', 'minderjährig', 'nachname', 'name', 'namen', 'namensänderung', 'neuer', 'neuerteilung', 'nicht', 'personen', 'personenbeförderung', 'pflichtumtausch', 'scheidung', 'tauschen', 'taxi', 'taxischein', 'umtausch', 'umtauschen', 'verloren', 'verlust', 'verlängern', 'verlängerung', 'wechseln', 'weit', 'wiedererteilung', 'ändern', 'änderung', 'öpnv', 'ab', 'abgelaufen', 'abhanden', 'ablauf', 'ablaufen', 'abzumelden', 'adresse', 'adressen', 'afrika', 'amerika', 'and', 'anschrift', 'anschriftes', 'anschrifts', 'ansprechpartner', 'anzumelden', 'argentinien', 'asien', 'auflösen', 'ausgelaufen', 'aushändigung', 'auskunft', 'ausrauben', 'australien', 'außerhalb', 'bearbeitung', 'bearbeitungsdauer', 'bedingung', 'bedingungen', 'beendet', 'belgien', 'bescheid', 'bezahlen', 'brasilien', 'bulgarien', 'bürgerbüro', 'china', 'dauer', 'dauern', 'dauert', 'deutschland', 'doppelname', 'drillinge', 'dänemark', 'ehe', 'ehename', 'ehenamen', 'einsegnen', 'ersetzen', 'erstmalig', 'estland', 'eu', 'europa', 'europäische', 'fahrausweis', 'fahrberechtigung', 'fahrerlaubnis', 'fahrerlaubnisbehörde', 'familienname', 'fingerabdruck', 'finnland', 'format', 'frankreich', 'frist', 'fristen', 'futschikato', 'führerschein', 'führerscheinbehörde', 'führerscheinstelle', 'führerschien', 'geburtsname', 'gebühr', 'gebühren', 'geheiratet', 'geklaut', 'geschieden', 'gesetz', 'gesetze', 'gesetzliche', 'gestohlen', 'getraut', 'getrennt', 'gewechselt', 'geändert', 'griechenland', 'grund', 'heirat', 'heiraten', 'hinblätern', 'hochzeit', 'hürden', 'indonesien', 'irland', 'italien', 'japan', 'jugendlich', 'kanada', 'kaputt', 'kind', 'kinder', 'kleinkind', 'kopie', 'kosten', 'kostenlos', 'kostet', 'kroatien', 'kuba', 'land', 'lange', 'lebensgemeinschaft', 'lebenspartnerschaft', 'lettland', 'liegen', 'litauen', 'luxemburg', 'läuft', 'malta', 'marokko', 'mexiko', 'motorradführerschein', 'mädchenname', 'münzt', 'nachame', 'nachname', 'nachnamen', 'nachnamens', 'nachweis', 'name', 'namen', 'namenswechsel', 'namensänderung', 'neuseeland', 'nicht', 'nichtgültig', 'nichtmehrgültig', 'nichtmehrverheiratet', 'nichtmehrzusammen', 'nichtverheiratet', 'nichtzusammen', 'niederlande', 'nordamerika', 'original', 'paragraph', 'paragraphen', 'polen', 'portugal', 'rechtlich', 'rechtsgrundlage', 'rechtsgrundlagev', 'rumänien', 'russland', 'scheiden', 'scheidung', 'schweden', 'schwinden', 'slowakei', 'slowenien', 'sohn', 'spanien', 'staaten', 'stehlen', 'stelle', 'südafrika', 'südamerika', 'teuer', 'thailand', 'tochter', 'trauen', 'trauung', 'trennen', 'tschechien', 'tunesien', 'turkei', 'ukraine', 'umzumelden', 'unauffindbar', 'ungarn', 'ungütlig', 'union', 'unterlagen', 'usa', 'veraltet', 'verehelichen', 'vereinigte', 'verfahren', 'verfahrens', 'verfahrensablauf', 'verfallen', 'verjährt', 'verlaufen', 'verlegt', 'verlieren', 'verloben', 'verlobt', 'verloren', 'verlust', 'verlängern', 'verläuft', 'vermisst', 'vermählen', 'verordnung', 'verordnungen', 'verschollen', 'verstorben', 'versunken', 'vietnam', 'voraussetzung', 'voraussetzungen', 'vorschrift', 'vorschriften', 'warten', 'wechseln', 'wegkommen', 'weit', 'wenden', 'wiederholt', 'wohnungsadresse', 'zahlen', 'zuständig', 'zuständige', 'zuständigkeit', 'zwillinge', 'zypern', 'ägypten', 'ändern', 'änderung', 'österreich', 'ablauf', 'and', 'anrufen', 'ansprechpartner', 'ausfüllen', 'aushändigung', 'auskunft', 'bearbeitung', 'bearbeitungsdauer', 'bedingung', 'bedingungen', 'behörde', 'belegen', 'bescheid', 'bezahlen', 'dauer', 'dauern', 'dauert', 'einsehen', 'erfüllen', 'erfüllt', 'fingerabdruck', 'format', 'formulare', 'frist', 'fristen', 'funktion', 'funktionen', 'gebühr', 'gebühren', 'gesetz', 'gesetze', 'gesetzliche', 'grund', 'hinblätern', 'hürden', 'kollege', 'kopie', 'kosten', 'kostenlos', 'kostet', 'kriterien', 'mitbringen', 'münzt', 'nachweis', 'nicht', 'online', 'original', 'paragraph', 'paragraphen', 'persönlich', 'rechtlich', 'rechtsgrundlage', 'rechtsgrundlagev', 'spätestens', 'teuer', 'unterlagen', 'verfahren', 'verfahrens', 'verfahrensablauf', 'verlaufen', 'verläuft', 'verordnung', 'verordnungen', 'voraussetzung', 'voraussetzungen', 'vorschrift', 'vorschriften', 'vorzeigen', 'weit', 'wenden', 'zahlen', 'zeit', 'zeitpunkt', 'zuständig', 'zuständigkeit', 'sbw_oeffnungszeiten_6008813', 'sbw_oeffnungszeiten_348', 'sbw_oeffnungszeiten_6004851', 'sbw_oeffnungszeiten_914', 'sbw_oeffnungszeiten_6004853', 'sbw_oeffnungszeiten_6004855', 'sbw_oeffnungszeiten_6004849', 'sbw_oeffnungszeiten_6004859', 'sbw_oeffnungszeiten_291', 'sbw_oeffnungszeiten_544', 'sbw_oeffnungszeiten_6004857', 'sbw_bearbeitungsdauer_6008813', 'sbw_vertiefendeinformationen_6008813', 'sbw_kosten_6008813', 'sbw_bezugsort_6008813', 'sbw_fristen_6008813', 'sbw_voraussetzungen_6008813', 'sbw_kosten_348', 'sbw_bearbeitungsdauer_348', 'sbw_vertiefendeinformationen_348', 'sbw_sonstiges_348', 'sbw_rechtsgrundlage_348', 'sbw_freigabevermerk_348', 'sbw_formulare_348', 'sbw_prozesse_348', 'sbw_organisationseinheiten_348', 'sbw_verfahrensablauf_6004851', 'sbw_fristen_6004851', 'sbw_unterlagen_6004851', 'sbw_kosten_6004851', 'sbw_bearbeitungsdauer_6004851', 'sbw_vertiefendeinformationen_6004851', 'sbw_sonstiges_6004851', 'sbw_rechtsgrundlage_6004851', 'sbw_freigabevermerk_6004851', 'sbw_zustaendigkeit_6008813', 'sbw_unterlagen_6008813', 'sbw_sonstiges_6008813', 'sbw_voraussetzungen_348', 'sbw_zustaendigkeit_348', 'sbw_bezugsort_348', 'sbw_verfahrensablauf_348', 'sbw_fristen_348', 'sbw_unterlagen_348', 'sbw_preamble_6004851', 'sbw_voraussetzungen_6004851', 'sbw_zustaendigkeit_6004851', 'sbw_bezugsort_6004851', 'sbw_rechtsgrundlage_6008813', 'sbw_freigabevermerk_6008813', 'sbw_preamble_348', 'sbw_organisationseinheiten_6004851', 'sbw_preamble_914', 'sbw_voraussetzungen_914', 'sbw_zustaendigkeit_914', 'sbw_bezugsort_914', 'sbw_fristen_914', 'sbw_verfahrensablauf_914', 'sbw_unterlagen_914', 'sbw_kosten_914', 'sbw_bearbeitungsdauer_914', 'sbw_vertiefendeinformationen_914', 'sbw_sonstiges_914', 'sbw_rechtsgrundlage_914', 'sbw_freigabevermerk_914', 'sbw_formulare_914', 'sbw_prozesse_914', 'sbw_organisationseinheiten_914', 'sbw_formulare_6004851', 'sbw_prozesse_6004851', 'sbw_preamble_6008813', 'sbw_verfahrensablauf_6008813', 'sbw_preamble_6004857', 'sbw_voraussetzungen_6004857', 'sbw_zustaendigkeit_6004857', 'sbw_bezugsort_6004857', 'sbw_verfahrensablauf_6004857', 'sbw_fristen_6004857', 'sbw_unterlagen_6004857', 'sbw_kosten_6004857', 'sbw_bearbeitungsdauer_6004857', 'sbw_vertiefendeinformationen_6004857', 'sbw_sonstiges_6004857', 'sbw_preamble_6004859', 'sbw_voraussetzungen_6004859', 'sbw_zustaendigkeit_6004859', 'sbw_bezugsort_6004859', 'sbw_verfahrensablauf_6004859', 'sbw_fristen_6004859', 'sbw_unterlagen_6004859', 'sbw_kosten_6004859', 'sbw_bearbeitungsdauer_6004859', 'sbw_vertiefendeinformationen_6004859', 'sbw_sonstiges_6004859', 'sbw_rechtsgrundlage_6004859', 'sbw_freigabevermerk_6004859', 'sbw_formulare_6004859', 'sbw_prozesse_6004859', 'sbw_organisationseinheiten_6004859', 'sbw_preamble_291', 'sbw_voraussetzungen_291', 'sbw_zustaendigkeit_291', 'sbw_bezugsort_291', 'sbw_verfahrensablauf_291', 'sbw_fristen_291', 'sbw_unterlagen_291', 'sbw_kosten_291', 'sbw_bearbeitungsdauer_291', 'sbw_vertiefendeinformationen_291', 'sbw_sonstiges_291', 'sbw_rechtsgrundlage_291', 'sbw_freigabevermerk_291', 'sbw_formulare_291', 'sbw_prozesse_291', 'sbw_organisationseinheiten_291', 'sbw_freigabevermerk_6004857', 'sbw_rechtsgrundlage_6004857', 'sbw_formulare_6004857', 'sbw_prozesse_6004857', 'sbw_organisationseinheiten_6004857', 'sbw_preamble_544', 'sbw_voraussetzungen_544', 'sbw_zustaendigkeit_544', 'sbw_bezugsort_544', 'sbw_verfahrensablauf_544', 'sbw_fristen_544', 'sbw_unterlagen_544', 'sbw_kosten_544', 'sbw_bearbeitungsdauer_544', 'sbw_vertiefendeinformationen_544', 'sbw_sonstiges_544', 'sbw_rechtsgrundlage_544', 'sbw_freigabevermerk_544', 'sbw_formulare_544', 'sbw_prozesse_544', 'sbw_organisationseinheiten_544', 'sbw_preamble_6004853', 'sbw_voraussetzungen_6004853', 'sbw_zustaendigkeit_6004853', 'sbw_bezugsort_6004853', 'sbw_verfahrensablauf_6004853', 'sbw_fristen_6004853', 'sbw_unterlagen_6004853', 'sbw_kosten_6004853', 'sbw_bearbeitungsdauer_6004853', 'sbw_vertiefendeinformationen_6004853', 'sbw_sonstiges_6004853', 'sbw_rechtsgrundlage_6004853', 'sbw_freigabevermerk_6004853', 'sbw_formulare_6004853', 'sbw_prozesse_6004853', 'sbw_organisationseinheiten_6004853', 'sbw_preamble_6004855', 'sbw_voraussetzungen_6004855', 'sbw_zustaendigkeit_6004855', 'sbw_bezugsort_6004855', 'sbw_verfahrensablauf_6004855', 'sbw_fristen_6004855', 'sbw_unterlagen_6004855', 'sbw_kosten_6004855', 'sbw_bearbeitungsdauer_6004855', 'sbw_vertiefendeinformationen_6004855', 'sbw_sonstiges_6004855', 'sbw_rechtsgrundlage_6004855', 'sbw_freigabevermerk_6004855', 'sbw_preamble_6004849', 'sbw_voraussetzungen_6004849', 'sbw_zustaendigkeit_6004849', 'sbw_bezugsort_6004849', 'sbw_verfahrensablauf_6004849', 'sbw_fristen_6004849', 'sbw_unterlagen_6004849', 'sbw_kosten_6004849', 'sbw_bearbeitungsdauer_6004849', 'sbw_vertiefendeinformationen_6004849', 'sbw_sonstiges_6004849', 'sbw_rechtsgrundlage_6004849', 'sbw_freigabevermerk_6004849', 'sbw_formulare_6004849', 'sbw_prozesse_6004849', 'sbw_organisationseinheiten_6004849'
|
|
])
|
|
###################################################
|
|
import numpy as np
|
|
|
|
import random
|
|
import os
|
|
import inspect
|
|
|
|
|
|
def getPath(file):
|
|
path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
|
|
path = os.path.join(path, file).replace("\\", "/")
|
|
return path
|
|
|
|
|
|
from nltk.tokenize import sent_tokenize
|
|
|
|
#from summarize import summarize
|
|
# Veranstaltungskalender
|
|
##############################################
|
|
from datetime import timedelta
|
|
from datetime import date
|
|
from datetime import datetime
|
|
|
|
|
|
def next_weekday(d, weekday):
|
|
days_ahead = weekday - d.weekday()
|
|
if days_ahead <= 0: # Target day already happened this week
|
|
days_ahead += 7
|
|
return d + timedelta(days_ahead)
|
|
|
|
|
|
def past_weekday(d, weekday):
|
|
days_ahead = weekday - d.weekday()
|
|
if days_ahead <= 0: # Target day already happe#######################ned this week
|
|
days_ahead -= 7
|
|
return d + timedelta(days_ahead)
|
|
|
|
|
|
def sum_kata(text):
|
|
print(type(text))
|
|
if '<p>Bitte beachten Sie:</p>' in text:
|
|
print('hierkata')
|
|
text=text.replace('<p>Bitte beachten Sie:</p>', '')
|
|
print('textkata', text)
|
|
if '<p>----------------------------------------------------------------------------------------------</p>' in text:
|
|
text=text.replace('<p>----------------------------------------------------------------------------------------------</p>', '')
|
|
article_text = BeautifulSoup(text, "html.parser")
|
|
print('article_text', article_text)
|
|
print('article_text', type(article_text))
|
|
|
|
paragraphs = article_text.find_all('p')
|
|
print('paragraphs', paragraphs)
|
|
print(type(paragraphs))
|
|
anfangtext=''
|
|
|
|
text2 = " "
|
|
|
|
|
|
#if len(paragraphs)>1:
|
|
if len(paragraphs) > 0:
|
|
anfangtext=paragraphs[0]
|
|
print('anfangtext', anfangtext)
|
|
paragraphs.remove(anfangtext)
|
|
anfangtext=str(anfangtext)
|
|
|
|
print ('anfangtext', anfangtext)
|
|
print('paragraphs2', paragraphs)
|
|
|
|
#for p in paragraphs:
|
|
#text2 = text2 + ' ' + '<p>' + p.text + '</p>'
|
|
|
|
#text2 = re.sub(r"[-]", "", text2)
|
|
#text2 = text2.replace('Bitte beachten Sie:', '')
|
|
# print('article_text', article_text)
|
|
#print('text2', text2)
|
|
#sum = summarize(text2, sentence_count=1, language='german')
|
|
#sum = '<p>' + sum + '</p>'
|
|
#if anfangtext in sum:
|
|
#sum=sum
|
|
#print('sum', sum)
|
|
|
|
#else:
|
|
#sum = anfangtext + sum
|
|
|
|
anfangtext = re.sub(r"[-]", "", anfangtext)
|
|
sum = anfangtext
|
|
print('sum2', sum)
|
|
return sum
|
|
|
|
|
|
#################Wetter########
|
|
|
|
numbers = "(^a(?=\s)|eins|eine|einen|einer|einem|zwei|drei|vier|fünf|sechs|sieben|acht|neun|zehn|elf|zwölf|dreizehn|vierzehn|fünfzehn|sechszehn|siebzehn|achtzehn|neunzehn|zwanzig|dreizig|vierzig|fünfzig|sechszig|siebzig|achtzig|neunzig|hundert|tausend|1|2|3|4|5|6|7|8|9)"
|
|
day = "(montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag)"
|
|
week_day = "(montag|dienstag|mittwoch|donnerstag|freitag|samstag|sonntag)"
|
|
month = "(januar|februar|märz|april|may|june|july|august|september|oktober|november|dezember)"
|
|
# dmy = "(wochenende|jahr|tag|woche|monat)"
|
|
dmy = "(jahr|tag|woche|monat)"
|
|
rel_day = "(heute|gestern|morgen|übermorgen)"
|
|
exp1 = "(vor|nach|in|im|später|)"
|
|
exp2 = "(diese|dieses|dieser|diesen|diesem|nächste|nächstes|nächster|nächstem|nächsten|letzte|letztes|letzter|letztem|letzten)"
|
|
# iso = "\d+[/.]\d+[/.]"
|
|
iso = "\d+[/.]\d+"
|
|
iso2 = "\d+[/.]"
|
|
year = "((?<=\s)\d{4}|^\d{4})"
|
|
# regxp1 = "((\d+|(" + numbers + "[-\s]?)+) " + dmy + "n? " + exp1 + ")"
|
|
regxp1 = "((\d+|(" + exp1 + "[-\s]?)+) " + numbers + "n? " + dmy + ")"
|
|
|
|
regxp2 = "(" + exp2 + " (" + dmy + "|" + week_day + "|" + month + "))"
|
|
regxp8 = "(" + iso2 + " (" + month + "))"
|
|
|
|
date1 = "([012]?[0-9]|3[01])"
|
|
regxp6 = "(" + date1 + " " + month + ")"
|
|
# regxp4 = "(" + month + " " + date + "[te]?[,]? " + year + ")"
|
|
|
|
reg1 = re.compile(regxp1, re.IGNORECASE)
|
|
reg2 = re.compile(regxp2, re.IGNORECASE)
|
|
#####print(reg2)
|
|
reg3 = re.compile(rel_day, re.IGNORECASE)
|
|
reg4 = re.compile(iso)
|
|
reg5 = re.compile("wochenende")
|
|
reg9 = re.compile("Wochenende")
|
|
#####print(reg5)
|
|
reg6 = re.compile(regxp6, re.IGNORECASE)
|
|
reg7 = re.compile(day, re.IGNORECASE)
|
|
reg8 = re.compile(regxp8, re.IGNORECASE)
|
|
reg10 = re.compile(month, re.IGNORECASE)
|
|
|
|
|
|
def extractDate(text):
|
|
# Initialization
|
|
timex_found = []
|
|
|
|
# re.findall() finds all the substring matches, keep only the full
|
|
# matching string. Captures expressions such as 'number of days' ago, etc.
|
|
found = reg1.findall(text)
|
|
#####print('reg1')
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('1. reg1', timex)
|
|
timex = timex.replace('ein', '1')
|
|
timex = timex.replace('eine', '1')
|
|
timex = timex.replace('einen', '1')
|
|
timex = timex.replace('einer', '1')
|
|
timex = timex.replace('einem', '1')
|
|
timex = timex.replace('zwei', '2')
|
|
timex = timex.replace('drei', '3')
|
|
timex = timex.replace('vier', '4')
|
|
timex = timex.replace('fünf', '5')
|
|
timex = timex.replace('sechs', '6')
|
|
timex = timex.replace('sieben', '7')
|
|
timex = timex.replace('acht', '8')
|
|
timex = timex.replace('neun', '9')
|
|
timex = timex.replace('zehn', '10')
|
|
timex = timex.replace('nach', 'in')
|
|
|
|
timex1 = (dateparser.parse('in' + timex)).strftime('%d.%m.%Y')
|
|
right_now_str = date.today().strftime('%d.%m.%Y')
|
|
z00 = right_now_str
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
# Variations of this thursday, next year, etc
|
|
|
|
# Variations of month and iso
|
|
reg8.findall(text)
|
|
#####print('reg8')
|
|
found = reg8.findall(text)
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg8', timex)
|
|
timex1 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
found = reg2.findall(text)
|
|
#####print('reg2')
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg2', timex)
|
|
if any(c in timex for c in ['nächste']):
|
|
if any(d in timex for d in ['woche', 'Woche']):
|
|
glanceweek = timedelta(days=6)
|
|
#####print(glanceweek)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday_d = next_weekday(right_now, 0)
|
|
z01 = (next_monday_d + glanceweek)
|
|
z00 = next_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
|
|
if any(d in timex for d in ['tag', 'Tag']):
|
|
glanceweek = timedelta(days=7)
|
|
#####print(glanceweek)
|
|
right_now = date.today()
|
|
next_monday = right_now.strftime('%d.%m.%Y')
|
|
z01 = (right_now + glanceweek)
|
|
z00 = next_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
|
|
if any(d in timex for d in ['monat', 'Monat']):
|
|
timex1 = dateparser.parse('nächsten Monat').strftime('%d.%m.%Y')
|
|
day2, month2, year2 = timex1.split('.')
|
|
b = '01.'
|
|
c = '30.'
|
|
z00 = b + month2 + '.' + year2
|
|
z01 = c + month2 + '.' + year2
|
|
|
|
if any(d in timex for d in ['montag', 'Montag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['dienstag', 'Dienstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['mittwoch', 'Mittwoch']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['donnerstag', 'Donnerstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['freitag', 'Freitag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['samstag', 'Samstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['sonntag', 'Sonntag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(c in timex for c in ['diese']):
|
|
if any(d in timex for d in ['woche', 'Woche']):
|
|
right_now = date.today()
|
|
next_monday = right_now.strftime('%d.%m.%Y')
|
|
next_monday_d = next_weekday(right_now, 6)
|
|
z01 = next_monday_d
|
|
z00 = next_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
|
|
if any(d in timex for d in ['monat', 'Monat']):
|
|
timex1 = dateparser.parse('diesen Monat').strftime('%d.%m.%Y')
|
|
day2, month2, year2 = timex1.split('.')
|
|
b = '01.'
|
|
c = '30.'
|
|
z00 = dateparser.parse('diesen Monat').strftime('%d.%m.%Y')
|
|
z01 = c + month2 + '.' + year2
|
|
|
|
if any(d in timex for d in ['montag', 'Montag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['dienstag', 'Dienstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['mittwoch', 'Mittwoch']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['donnerstag', 'Donnerstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['freitag', 'Freitag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['samstag', 'Samstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['sonntag', 'Sonntag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(c in timex for c in ['letzte']):
|
|
if any(d in timex for d in ['woche', 'Woche']):
|
|
glanceweek = timedelta(days=6)
|
|
#####print(glanceweek)
|
|
right_now = date.today()
|
|
past_monday = past_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
past_monday_d = past_weekday(right_now, 0)
|
|
z01 = (past_monday_d + glanceweek)
|
|
z00 = past_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
if 'z00' in locals():
|
|
timex_found.append(z00)
|
|
else:
|
|
timex_found.append(dateparser.parse('heute').strftime('%d.%m.%Y'))
|
|
if 'z01' in locals():
|
|
timex_found.append(z01)
|
|
else:
|
|
timex_found.append(dateparser.parse('heute').strftime('%d.%m.%Y'))
|
|
|
|
found = reg5.findall(text)
|
|
#####print('reg5')
|
|
# found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg5', timex)
|
|
if not timex_found == []:
|
|
timex_found.clear()
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
if timex_found == []:
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
found = reg9.findall(text)
|
|
#####print('reg9')
|
|
# found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg9', timex)
|
|
if not timex_found == []:
|
|
timex_found.clear()
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
if timex_found == []:
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
# today, tomorrow, etc
|
|
found = reg3.findall(text)
|
|
#####print('reg3')
|
|
for timex in found:
|
|
#####print('reg3', timex)
|
|
if any(d in timex for d in ['übermorgen']):
|
|
#####print('timex', timex)
|
|
z00 = dateparser.parse('heute').strftime('%d.%m.%Y')
|
|
glanceweek = timedelta(days=2)
|
|
#####print(glanceweek)
|
|
timex1 = dateparser.parse('heute')
|
|
z01 = (timex1 + glanceweek)
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
else:
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z01 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
# ISO
|
|
found = reg4.findall(text)
|
|
#####print('reg4')
|
|
for timex in found:
|
|
#####print('reg4', timex)
|
|
timex1 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
#####print(z00)
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
# Dates
|
|
found = reg6.findall(text)
|
|
#####print('reg6')
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg6', timex)
|
|
timex1 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
found = reg7.findall(text)
|
|
#####print('reg7')
|
|
for timex in found:
|
|
#####print('reg7', timex)
|
|
if timex_found == []:
|
|
if any(d in timex for d in ['montag', 'Montag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['dienstag', 'Dienstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['mittwoch', 'Mittwoch']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['donnerstag', 'Donnerstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['freitag', 'Freitag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['samstag', 'Samstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['sonntag', 'Sonntag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
found = reg10.findall(text)
|
|
#####print('reg10')
|
|
for timex in found:
|
|
#####print('reg10', timex)
|
|
if timex_found == []:
|
|
timex1 = dateparser.parse(timex, settings={'PREFER_DAY_OF_MONTH': 'last'}).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex, settings={'PREFER_DAY_OF_MONTH': 'first'}).strftime('%d.%m.%Y')
|
|
#####print(z00)
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
# Year
|
|
# found = reg5.findall(text)
|
|
# for timex in found:
|
|
# timex_found.append(timex)
|
|
# Tag only temporal expressions which haven't been tagged.
|
|
# for timex in timex_found:
|
|
# text = re.sub(timex + '(?!</TIMEX2>)', '<TIMEX2>' + timex + '</TIMEX2>', text)
|
|
if timex_found == []:
|
|
timex_found.append('heute')
|
|
|
|
for match in timex_found:
|
|
if match == 'heute':
|
|
glanceweek = timedelta(days=0)
|
|
right_now = date.today()
|
|
right_now_str = date.today().strftime('%d.%m.%Y')
|
|
z01 = (right_now + glanceweek)
|
|
z00 = right_now_str
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
timex_found.remove('heute')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
#####print('erste element', timex_found[0])
|
|
#####print('zwite element', timex_found[1])
|
|
|
|
return timex_found
|
|
|
|
|
|
from geotext import GeoText
|
|
|
|
|
|
def show_ents(doc):
|
|
print('docanfang', doc)
|
|
if "Wetter" in doc:
|
|
#print('Wetter word')
|
|
doc = doc.replace('Wetter', '')
|
|
#print('docadanach', doc)
|
|
doc = GeoText(doc)
|
|
print('doc', doc)
|
|
if doc.cities:
|
|
loc = doc.cities
|
|
#print('loc', loc)
|
|
return loc[0]
|
|
if doc.countries:
|
|
loc = doc.countries
|
|
return loc[0]
|
|
else:
|
|
return 'Freiburg'
|
|
|
|
|
|
def named_entity_wetter(frage):
|
|
named_entity_wetter = {}
|
|
named_entities = []
|
|
|
|
spacy_entity = show_ents(frage)
|
|
# print('spacy_entity', spacy_entity)
|
|
named_entities.append(spacy_entity)
|
|
named_entity_wetter['service'] = 'weather'
|
|
named_entity_wetter['location'] = list(set(named_entities))
|
|
# print('named_entity_wetter1', named_entity_wetter)
|
|
datum = extractDate(frage)
|
|
#####print(datum)
|
|
proofDatum = datum[1]
|
|
#####print(proofDatum)
|
|
glanceweek = timedelta(days=5)
|
|
defaultproofDatum = dateparser.parse('heute')
|
|
defaultproofDatum5 = (defaultproofDatum + glanceweek)
|
|
if datetime.strptime(proofDatum, '%d.%m.%Y').date() > defaultproofDatum5.date():
|
|
#####print('datum später!!!')
|
|
anzeige = ('Ich kann das Wetter der nächsten 5 Tage vorhersagen, danach wird die Vorhersage zu ungenau 🌈 🌦️')
|
|
named_entity_wetter['zeit'] = extractDate('nächste 4 Tage')
|
|
return anzeige, named_entity_wetter
|
|
if datetime.strptime(proofDatum, '%d.%m.%Y').date() < defaultproofDatum.date():
|
|
#####print('datum früher!!!')
|
|
anzeige = ('Ich kann das Wetter der nächsten 5 Tage vorhersagen, danach wird die Vorhersage zu ungenau 🌈 🌦️')
|
|
named_entity_wetter['zeit'] = extractDate('nächste 4 Tage')
|
|
return anzeige, named_entity_wetter
|
|
else:
|
|
#####print('datum normal!!!')
|
|
named_entity_wetter['zeit'] = extractDate(frage)
|
|
#####print('named_entity_wetter2', named_entity_wetter)
|
|
return named_entity_wetter
|
|
|
|
|
|
###################################################
|
|
##########Schreibfehler Korrektur
|
|
# from spellchecker import SpellChecker
|
|
|
|
# spell = SpellChecker(language='de')
|
|
|
|
|
|
################################################
|
|
|
|
|
|
import pickle
|
|
import json
|
|
|
|
# wiederherstelle alle unsere Datenstrukturen
|
|
data = pickle.load(open("prodae/models/trained_data", "rb"))
|
|
words = data['words']
|
|
classes = data['classes']
|
|
train_x = data['train_x']
|
|
train_y = data['train_y']
|
|
|
|
# Aufbau des neuronalen Netzes
|
|
net = tflearn.input_data(shape=[None, len(train_x[0])])
|
|
net = tflearn.fully_connected(net, 88)
|
|
net = tflearn.fully_connected(net, 88)
|
|
net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')
|
|
net = tflearn.regression(net)
|
|
|
|
# Definiere das Modell und konfiguriere tensorboard
|
|
model = tflearn.DNN(net, tensorboard_dir='train_logs')
|
|
|
|
|
|
# importiere die Dialogdesign-Datei
|
|
|
|
|
|
def lowercase(obj):
|
|
""" Make dictionary lowercase """
|
|
if isinstance(obj, dict):
|
|
return {k: lowercase(v) for k, v in obj.items()}
|
|
elif isinstance(obj, (list, set, tuple)):
|
|
t = type(obj)
|
|
return t(lowercase(o) for o in obj)
|
|
elif isinstance(obj, str):
|
|
if " " in obj:
|
|
object = []
|
|
o = nltk.word_tokenize(obj)
|
|
for i in o:
|
|
i = stemmer.stem(i.lower())
|
|
object.append(i)
|
|
s = ' '
|
|
object = s.join(object)
|
|
return object
|
|
|
|
else:
|
|
return stemmer.stem(obj.lower())
|
|
else:
|
|
return obj
|
|
|
|
|
|
def containsAll(str, set):
|
|
""" Check whether sequence str contains ALL of the items in set. """
|
|
return 0 not in [c in str for c in set]
|
|
|
|
|
|
import pickle
|
|
|
|
with open("prodae/models/org_data", "rb") as filehandleorg:
|
|
# read the data as binary data stream
|
|
org_entiti = pickle.load(filehandleorg)
|
|
|
|
with open("prodae/models/hilfe_data", "rb") as filehandlehilfe:
|
|
# read the data as binary data stream
|
|
hilf_entiti = pickle.load(filehandlehilfe)
|
|
with open("prodae/models/leistung_data", "rb") as filehandleleistung:
|
|
# read the data as binary data stream
|
|
leistung_entiti = pickle.load(filehandleleistung)
|
|
|
|
with open("prodae/models/lebenslage_data", "rb") as filehandlelebenslage:
|
|
# read the data as binary data stream
|
|
lebenslage_entiti = pickle.load(filehandlelebenslage)
|
|
|
|
|
|
# print('stadtteile_entiti', stadtteile_entiti)
|
|
|
|
def hilf_entity(frage):
|
|
# start = timeit.default_timer()
|
|
|
|
# print('hier bin ich!!!')
|
|
|
|
# d_entity = lowercase(d_entity)
|
|
######print(hilf_entity)
|
|
##print('hilfentity_frage!', frage)
|
|
frage = ''.join([str(elem) for elem in frage])
|
|
##print('hilfentity_frage1!', frage)
|
|
sentence_word = nltk.word_tokenize(frage)
|
|
##print('sentence_word!!!', sentence_word)
|
|
# generiere die Stopwörter
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '-', '&', '.', ',', 'auf', 'können', 'kannst', 'mehr', 'erzählen', 'erzählst', 'alten',
|
|
'frau',
|
|
'toll', 'beantragen', 'brauche', 'brauchen', 'beantrage', 'liebst', 'lieben', 'mein',
|
|
'meines', 'meins', 'meiner', 'meinem', 'meines', 'meinen', 'wurde', 'möchte', 'möchten', 'möchtest','mocht',
|
|
'wurden', 'würde', 'würden', 'funktionen', 'gern', 'gerne', 'beantragen', 'beantrage', 'beantragt', 'benötige', 'brauche'] + stop
|
|
######Korrektur Schreibfehler
|
|
stemmed_frage = []
|
|
named_entities = []
|
|
for word in sentence_word:
|
|
if word == "OB":
|
|
stemmed_frage.append("oberburgermeist")
|
|
word = word.lower()
|
|
word = spell.correction(word)
|
|
lenhallo= len(frage) - len(word)
|
|
if word not in ignore_words or word == 'im' or word == 'dich' or word == 'am' or word == 'um' or word == 'was' or word == 'war' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'mich' or word == 'jetzt' or word == 'mich' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
word = stemmer.stem(word)
|
|
stemmed_frage.append(word)
|
|
if word == 'hallo' or word == 'hey' or word == 'hi' or word == 'servus' or word == 'mittag' or word == 'abend' or word == 'tag' or word == 'morgen' or word == 'grüß' or word == 'huhu' or word == 'moin':
|
|
if lenhallo >= 7:
|
|
print(frage, lenhallo, len(frage), len(word))
|
|
print('ok!!!!')
|
|
stemmed_frage.remove(word)
|
|
else:
|
|
stemmed_frage.append(word)
|
|
|
|
####print('stemmed_frage', stemmed_frage)
|
|
named_entities = stemmed_frage
|
|
for k, v in hilf_entiti.items():
|
|
#####print('v', v)
|
|
for item in v:
|
|
#####print('item', item)
|
|
stemmed_entity = nltk.word_tokenize(item, language='german')
|
|
#####print('stemmed_entity', stemmed_entity)
|
|
if containsAll(stemmed_frage, stemmed_entity) != 0:
|
|
print('stemmed_frage', stemmed_frage)
|
|
print('stemmed_entity', stemmed_entity)
|
|
named_entities.append(lowercase(k))
|
|
#####print('named_entities', named_entities)
|
|
for b in stemmed_entity:
|
|
#####print('b', b)
|
|
if b in named_entities:
|
|
named_entities.remove(b)
|
|
#####print('named_entities2', named_entities)
|
|
# end = timeit.default_timer()
|
|
# time = end - start
|
|
# print('named_entities:', named_entities)
|
|
return list(set(named_entities))
|
|
|
|
|
|
from fuzzywuzzy import process
|
|
|
|
|
|
|
|
|
|
def named_entity(stemmed_frage):
|
|
# start = timeit.default_timer()
|
|
|
|
#####print(entitaet)
|
|
|
|
# d_entity = lowercase(d_entity)
|
|
#####print(d_entity)
|
|
######Korrektur Schreibfehler
|
|
# stemmed_frage = hilf_entity(frage)
|
|
# stemmed_frage = lowercase(stemmed_frage)
|
|
# print('stemmed_frage2', stemmed_frage)
|
|
named_entities = []
|
|
for k, v in leistung_entiti.items():
|
|
#####print('v', v)
|
|
for item in v:
|
|
#####print('item', item)
|
|
stemmed_entity = nltk.word_tokenize(item, language='german')
|
|
#####print('stemmed_entity2', stemmed_entity)
|
|
if containsAll(stemmed_frage, stemmed_entity) != 0:
|
|
named_entities.append(k)
|
|
named_entities = list(set(named_entities))
|
|
#####print('named_entities', named_entities)
|
|
if len(named_entities) > 1:
|
|
for ent in named_entities:
|
|
if ent == '316':
|
|
named_entities.remove(ent)
|
|
if ent == '160':
|
|
named_entities.remove(ent)
|
|
if ent == '178':
|
|
named_entities.remove(ent)
|
|
if ent == '417':
|
|
named_entities.remove(ent)
|
|
if ent == '1439':
|
|
named_entities.remove(ent)
|
|
named_entities = named_entities[0]
|
|
# end = timeit.default_timer()
|
|
# time = end - start
|
|
# print('Zeit gebraucht:', time)
|
|
return str(named_entities).strip('[]')
|
|
|
|
def pruef_sbw(stemmed_frage):
|
|
# start = timeit.default_timer()
|
|
|
|
#####print(entitaet)
|
|
|
|
# d_entity = lowercase(d_entity)
|
|
#####print(d_entity)
|
|
######Korrektur Schreibfehler
|
|
# stemmed_frage = hilf_entity(frage)
|
|
# stemmed_frage = lowercase(stemmed_frage)
|
|
# print('stemmed_frage2', stemmed_frage)
|
|
named_entities = list(set(stemmed_frage))
|
|
#print('named_entities pruef', named_entities)
|
|
for k, v in leistung_entiti.items():
|
|
#print('v', v)
|
|
for item in v:
|
|
#####print('item', item)
|
|
stemmed_entity = nltk.word_tokenize(item, language='german')
|
|
#print('stemmed_entity2', stemmed_entity)
|
|
if containsAll(stemmed_frage, stemmed_entity) != 0:
|
|
for b in stemmed_entity:
|
|
#print('b', b)
|
|
if b in stemmed_frage:
|
|
if b in named_entities:
|
|
named_entities.remove(b)
|
|
named_entities.append('sbw_preamble')
|
|
named_entities = list(set(named_entities))
|
|
#####print('named_entities', named_entities)
|
|
|
|
# end = timeit.default_timer()
|
|
# time = end - start
|
|
# print('Zeit gebraucht:', time)
|
|
named_entities = list(set(named_entities))
|
|
named_entities = " ".join(named_entities)
|
|
named_entities = str(named_entities)
|
|
print('pruef_sbw named_entities', named_entities)
|
|
return named_entities
|
|
|
|
def pruef_kontext(frage):
|
|
# d_entity = lowercase(d_entity)
|
|
#####print(hilf_entity)
|
|
sentence_word = nltk.word_tokenize(frage)
|
|
#####print()
|
|
# generiere die Stopwörter
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '.', ',', 'möchte', 'möchtest', 'möchten', 'brauche', 'brauch', 'frage', 'zu',
|
|
'beantragen', 'beantrag', 'beantrage', 'beantragen', 'benötige', 'brauche'] + stop
|
|
######Korrektur Schreibfehler
|
|
stemmed_frage = []
|
|
named_entities = []
|
|
for word in sentence_word:
|
|
|
|
if word not in ignore_words or word == 'was' or word == 'um' or word == 'war' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'mich' or word == 'jetzt' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
stemmed_frage.append(word)
|
|
#####print('stemmed_frage', stemmed_frage)
|
|
named_entities = stemmed_frage
|
|
for k, v in lebenslage_entiti.items():
|
|
#print('lebenslage_entiti.items()', lebenslage_entiti.items())
|
|
for item in v:
|
|
#####print('item', item)
|
|
stemmed_entity = nltk.word_tokenize(item, language='german')
|
|
#####print('stemmed_entity', stemmed_entity)
|
|
if containsAll(stemmed_frage, stemmed_entity) != 0:
|
|
#print('stemmed_frage', stemmed_frage)
|
|
#print('stemmed_entity', stemmed_entity)
|
|
# named_entities.append(k)
|
|
#print('named_entities', named_entities)
|
|
for b in stemmed_entity:
|
|
print('b', b)
|
|
if b in named_entities:
|
|
named_entities.remove(b)
|
|
named_entities.append('ist')
|
|
#print('named_entities2', named_entities)
|
|
named_entities = list(set(named_entities))
|
|
named_entities = ", ".join(named_entities)
|
|
named_entities = str(named_entities)
|
|
print(named_entities)
|
|
return named_entities
|
|
|
|
from HanTa import HanoverTagger as ht
|
|
|
|
def pos(frage):
|
|
named_entities = []
|
|
tokenized_sent = nltk.word_tokenize(frage, language='german')
|
|
print(tokenized_sent)
|
|
|
|
tagger = ht.HanoverTagger('morphmodel_ger.pgz')
|
|
|
|
tags = tagger.tag_sent(tokenized_sent)
|
|
print(tags)
|
|
for item in tags:
|
|
if 'NN' in item:
|
|
named_entities.append('NN')
|
|
else:
|
|
named_entities.append('')
|
|
return named_entities
|
|
|
|
|
|
def queryfrage(frage):
|
|
# tokenisiere die synonymen
|
|
queryfrage = {}
|
|
queryfrage['prefix'] = 'Ich habe folgenden Informationen für Sie gefunden: <br>'
|
|
queryfrage['service'] = 'search'
|
|
queryfrage['original'] = frage
|
|
frageneu = hilf_entity(frage)
|
|
frageneu = ", ".join(frageneu)
|
|
frageneu = str(frageneu)
|
|
frageneu = frageneu.replace(',', '')
|
|
sentence_word = nltk.word_tokenize(frageneu)
|
|
#####print('sentence_word', sentence_word)
|
|
# generiere die Stopwörter
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '-', '.', ',', 'mocht', 'frage', 'zu', 'beantragen', 'beantrag', 'gibt', 'beantrage',
|
|
'melden', 'beantragen', 'benötige', 'brauche'] + stop
|
|
######Korrektur Schreibfehler
|
|
sentence_words = []
|
|
for word in sentence_word:
|
|
word = word.lower()
|
|
#####print('len frage', len(frage))
|
|
#####print('len word', len(word))
|
|
# word = spell.correction(word)
|
|
if word in ignore_words and (word == 'zu'):
|
|
sentence_words.append("zu")
|
|
if word not in ignore_words or word == 'was' or word == 'um' or word == 'war' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'jetzt' or word == 'mich' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
# if get_entity(word) is None:
|
|
sentence_words.append(word)
|
|
|
|
|
|
# else:
|
|
# sentence_words.append(get_entity(word))
|
|
|
|
# stemme jedes Wort
|
|
frage = ''.join([str(elem) for elem in frage])
|
|
print('frage!!!!', frage)
|
|
query_word = nltk.word_tokenize(frage)
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '-', '.', ',', 'mocht', 'frage', 'zu', 'beantragen', 'beantrag', 'gibt', 'beantrage',
|
|
'melden', 'beantragen', 'benötige', 'brauche'] + stop
|
|
######Korrektur Schreibfehler
|
|
query_words = []
|
|
for word in query_word:
|
|
word = word.lower()
|
|
# ####print('len frage', len(frage))
|
|
# ####print('len word', len(word))
|
|
#word = spell.correction(word)
|
|
if word in ignore_words and (word == 'zu'):
|
|
query_words.append("zu")
|
|
if word not in ignore_words or word == 'was' or word == 'um' or word == 'war' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'jetzt' or word == 'mich' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
# if get_entity(word) is None:
|
|
query_words.append(word)
|
|
if word == 'hallo' or word == 'hey' or word == 'hi' or word == 'servus' or word == 'mittag' or word == 'abend' or word == 'tag' or word == 'morgen' or word == 'grüß' or word == 'huhu' or word == 'moin' and (
|
|
len(frage) - len(word) >= 7):
|
|
# ####print('ok!!!!')
|
|
query_words.remove(word)
|
|
query_words = [word.lower() for word in query_words]
|
|
query_words = ", ".join(query_words)
|
|
query_words = str(query_words)
|
|
query_words = query_words.replace(',', '')
|
|
####print('query_words',query_words)
|
|
queryfrage['query'] = query_words
|
|
return queryfrage
|
|
|
|
|
|
def frageBearbeitung(frage):
|
|
# tokenisiere die synonymen
|
|
frage = hilf_entity(frage)
|
|
frage = ", ".join(frage)
|
|
frage = str(frage)
|
|
frage = frage.replace(',', '')
|
|
sentence_word = nltk.word_tokenize(frage)
|
|
#####print('sentence_word', sentence_word)
|
|
# generiere die Stopwörter
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '-', '&', '.', ',', 'auf', 'können', 'kannst', 'mehr', 'erzählen', 'erzählst', 'alten',
|
|
'frau',
|
|
'toll', 'haus', 'beantragen', 'brauche', 'brauchen', 'beantrage', 'liebst', 'lieben', 'mein',
|
|
'meines', 'meins', 'meiner', 'meinem', 'meines', 'meinen', 'wurde', 'möchte', 'möchten', 'mocht',
|
|
'wurden', 'würde', 'würden', 'funktionen', 'beantragen', 'benötige', 'brauche'] + stop
|
|
######Korrektur Schreibfehler
|
|
sentence_words = []
|
|
for word in sentence_word:
|
|
word = word.lower()
|
|
#####print('len frage', len(frage))
|
|
#####print('len word', len(word))
|
|
# word = spell.correction(word)
|
|
if word in ignore_words and (word == 'zu'):
|
|
sentence_words.append("zu")
|
|
|
|
if word not in ignore_words or word == 'was' or word == 'war' or word == 'um' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'jetzt' or word == 'mich' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
# if get_entity(word) is None:
|
|
sentence_words.append(word)
|
|
|
|
sentence_words = [word.lower() for word in sentence_words]
|
|
print('sentence_words_frage', sentence_words)
|
|
return sentence_words
|
|
|
|
|
|
import dateparser
|
|
|
|
right_now = date.today()
|
|
|
|
|
|
def extractDateOeffnungszeit(text):
|
|
# Initialization
|
|
timex_found = []
|
|
|
|
# re.findall() finds all the substring matches, keep only the full
|
|
# matching string. Captures expressions such as 'number of days' ago, etc.
|
|
found = reg1.findall(text)
|
|
#####print('reg1')
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('1. reg1', timex)
|
|
timex = timex.replace('ein', '1')
|
|
timex = timex.replace('eine', '1')
|
|
timex = timex.replace('einen', '1')
|
|
timex = timex.replace('einer', '1')
|
|
timex = timex.replace('einem', '1')
|
|
timex = timex.replace('zwei', '2')
|
|
timex = timex.replace('drei', '3')
|
|
timex = timex.replace('vier', '4')
|
|
timex = timex.replace('fünf', '5')
|
|
timex = timex.replace('sechs', '6')
|
|
timex = timex.replace('sieben', '7')
|
|
timex = timex.replace('acht', '8')
|
|
timex = timex.replace('neun', '9')
|
|
timex = timex.replace('zehn', '10')
|
|
timex = timex.replace('nach', 'in')
|
|
|
|
timex1 = (dateparser.parse('in' + timex)).strftime('%d.%m.%Y')
|
|
right_now_str = date.today().strftime('%d.%m.%Y')
|
|
z00 = right_now_str
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
# Variations of this thursday, next year, etc
|
|
|
|
# Variations of month and iso
|
|
reg8.findall(text)
|
|
#####print('reg8')
|
|
found = reg8.findall(text)
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg8', timex)
|
|
timex1 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
found = reg2.findall(text)
|
|
#####print('reg2')
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg2', timex)
|
|
if any(c in timex for c in ['nächste']):
|
|
if any(d in timex for d in ['woche']):
|
|
glanceweek = timedelta(days=6)
|
|
#####print(glanceweek)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday_d = next_weekday(right_now, 0)
|
|
z01 = (next_monday_d + glanceweek)
|
|
z00 = next_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
|
|
if any(d in timex for d in ['tag']):
|
|
glanceweek = timedelta(days=7)
|
|
#####print(glanceweek)
|
|
right_now = date.today()
|
|
next_monday = right_now.strftime('%d.%m.%Y')
|
|
z01 = (right_now + glanceweek)
|
|
z00 = next_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
|
|
if any(d in timex for d in ['monat']):
|
|
timex1 = dateparser.parse('nächsten Monat').strftime('%d.%m.%Y')
|
|
day2, month2, year2 = timex1.split('.')
|
|
b = '01.'
|
|
c = '30.'
|
|
z00 = b + month2 + '.' + year2
|
|
z01 = c + month2 + '.' + year2
|
|
|
|
if any(d in timex for d in ['montag', 'Montag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['dienstag', 'Dienstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['mittwoch', 'Mittwoch']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['donnerstag', 'Donnerstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['freitag', 'Freitag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['samstag', 'Samstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['sonntag', 'Sonntag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(c in timex for c in ['diese']):
|
|
if any(d in timex for d in ['woche']):
|
|
right_now = date.today()
|
|
next_monday = right_now.strftime('%d.%m.%Y')
|
|
next_monday_d = next_weekday(right_now, 6)
|
|
z01 = next_monday_d
|
|
z00 = next_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
|
|
if any(d in timex for d in ['monat']):
|
|
timex1 = dateparser.parse('diesen Monat').strftime('%d.%m.%Y')
|
|
day2, month2, year2 = timex1.split('.')
|
|
b = '01.'
|
|
c = '30.'
|
|
z00 = dateparser.parse('diesen Monat').strftime('%d.%m.%Y')
|
|
z01 = c + month2 + '.' + year2
|
|
|
|
if any(d in timex for d in ['montag', 'Montag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['dienstag', 'Dienstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['mittwoch', 'Mittwoch']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['donnerstag', 'Donnerstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['freitag', 'Freitag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['samstag', 'Samstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['sonntag', 'Sonntag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(c in timex for c in ['letzte']):
|
|
if any(d in timex for d in ['woche']):
|
|
glanceweek = timedelta(days=6)
|
|
#####print(glanceweek)
|
|
right_now = date.today()
|
|
past_monday = past_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
past_monday_d = past_weekday(right_now, 0)
|
|
z01 = (past_monday_d + glanceweek)
|
|
z00 = past_monday
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
if 'z00' in locals():
|
|
timex_found.append(z00)
|
|
else:
|
|
timex_found.append(dateparser.parse('heute').strftime('%d.%m.%Y'))
|
|
if 'z01' in locals():
|
|
timex_found.append(z01)
|
|
else:
|
|
timex_found.append(dateparser.parse('heute').strftime('%d.%m.%Y'))
|
|
|
|
found = reg5.findall(text)
|
|
#####print('reg5')
|
|
# found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg5', timex)
|
|
if not timex_found == []:
|
|
timex_found.clear()
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
if timex_found == []:
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
found = reg9.findall(text)
|
|
#####print('reg9')
|
|
# found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg9', timex)
|
|
if not timex_found == []:
|
|
timex_found.clear()
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
if timex_found == []:
|
|
|
|
weekno = date.today().weekday()
|
|
|
|
if weekno < 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_samstag = next_weekday(right_now, 5)
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = next_samstag.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
elif weekno == 5:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
next_sontag = next_weekday(right_now, 6)
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = next_sontag.strftime('%d.%m.%Y')
|
|
else:
|
|
#####print("Weekday")
|
|
right_now = date.today()
|
|
z00 = right_now.strftime('%d.%m.%Y')
|
|
z01 = right_now.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
# today, tomorrow, etc
|
|
found = reg3.findall(text)
|
|
#####print('reg3')
|
|
for timex in found:
|
|
#####print('reg3', timex)
|
|
if any(d in timex for d in ['übermorgen']):
|
|
#####print('timex', timex)
|
|
z00 = dateparser.parse('heute').strftime('%d.%m.%Y')
|
|
glanceweek = timedelta(days=2)
|
|
#####print(glanceweek)
|
|
timex1 = dateparser.parse('heute')
|
|
z01 = (timex1 + glanceweek)
|
|
z01 = z01.strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
else:
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z01 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
# ISO
|
|
found = reg4.findall(text)
|
|
#####print('reg4')
|
|
for timex in found:
|
|
#####print('reg4', timex)
|
|
timex1 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
#####print(z00)
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
# Dates
|
|
found = reg6.findall(text)
|
|
#####print('reg6')
|
|
found = [a[0] for a in found if len(a) > 1]
|
|
for timex in found:
|
|
#####print('reg6', timex)
|
|
timex1 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex).strftime('%d.%m.%Y')
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
found = reg7.findall(text)
|
|
#####print('reg7')
|
|
for timex in found:
|
|
#####print('reg7', timex)
|
|
if timex_found == []:
|
|
if any(d in timex for d in ['montag', 'Montag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 0).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['dienstag', 'Dienstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 1).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['mittwoch', 'Mittwoch']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 2).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['donnerstag', 'Donnerstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 3).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['freitag', 'Freitag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 4).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['samstag', 'Samstag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 5).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
|
|
if any(d in timex for d in ['sonntag', 'Sonntag']):
|
|
glanceweek = timedelta(days=7)
|
|
right_now = date.today()
|
|
next_monday = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
next_monday1 = next_weekday(right_now, 6).strftime('%d.%m.%Y')
|
|
z01 = next_monday1
|
|
z00 = next_monday
|
|
timex_found.append(z00)
|
|
timex_found.append(z01)
|
|
|
|
found = reg10.findall(text)
|
|
#####print('reg10')
|
|
for timex in found:
|
|
#####print('reg10', timex)
|
|
if timex_found == []:
|
|
timex1 = dateparser.parse(timex, settings={'PREFER_DAY_OF_MONTH': 'last'}).strftime('%d.%m.%Y')
|
|
z00 = dateparser.parse(timex, settings={'PREFER_DAY_OF_MONTH': 'first'}).strftime('%d.%m.%Y')
|
|
#####print(z00)
|
|
timex_found.append(z00)
|
|
timex_found.append(timex1)
|
|
|
|
# Year
|
|
# found = reg5.findall(text)
|
|
# for timex in found:
|
|
# timex_found.append(timex)
|
|
# Tag only temporal expressions which haven't been tagged.
|
|
# for timex in timex_found:
|
|
# text = re.sub(timex + '(?!</TIMEX2>)', '<TIMEX2>' + timex + '</TIMEX2>', text)
|
|
|
|
return timex_found
|
|
|
|
|
|
def named_entity_oefnungszeit(frage):
|
|
named_entity_oefnungszeit = {}
|
|
named_entities = []
|
|
termin = ""
|
|
intro = ""
|
|
frage1 = str(frage)
|
|
|
|
sentence_word = nltk.word_tokenize(frage1)
|
|
##print('sentence_word!!!', sentence_word)
|
|
# generiere die Stopwörter
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '.', ','] + stop
|
|
######Korrektur Schreibfehler
|
|
stemmed_frage = []
|
|
|
|
for word in sentence_word:
|
|
if word == "OB":
|
|
stemmed_frage.append("oberburgermeist")
|
|
word = word.lower()
|
|
word = spell.correction(word)
|
|
if word not in ignore_words or word == 'im' or word == 'am' or word == 'um' or word == 'was' or word == 'war' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'mich' or word == 'jetzt' or word == 'mich' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
word = stemmer.stem(word)
|
|
stemmed_frage.append(word)
|
|
named_entities = []
|
|
for k, v in org_entiti.items():
|
|
#####print('v', v)
|
|
#####print('k', k)
|
|
for item in v:
|
|
#####print('item', item)
|
|
stemmed_entity = nltk.word_tokenize(item, language='german')
|
|
#####print('stemmed_entity', stemmed_entity)
|
|
if containsAll(stemmed_frage, stemmed_entity) != 0:
|
|
named_entities.append(k)
|
|
named_entity_oefnungszeit['service'] = 'oeffnungszeit'
|
|
if 'ziegelhaus' in stemmed_frage:
|
|
####print('Lahr')
|
|
for i in named_entities:
|
|
if 'Ziegelhausen' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('Lahr', i)
|
|
if 'schlierbach' in stemmed_frage:
|
|
####print('offenburg')
|
|
for i in named_entities:
|
|
if 'Schlierbach' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('offenburg', i)
|
|
if 'wiebling' in stemmed_frage:
|
|
####print('Wolfach')
|
|
for i in named_entities:
|
|
if 'Wieblingen' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Wolfach', i)
|
|
|
|
if 'pfaffengrund' in stemmed_frage:
|
|
####print('Kehl')
|
|
for i in named_entities:
|
|
if 'Pfaffengrund' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Kehl', i)
|
|
|
|
if 'kirchheim' in stemmed_frage:
|
|
####print('Achern')
|
|
for i in named_entities:
|
|
if 'Kirchheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Achern', i)
|
|
|
|
if 'boxberg' in stemmed_frage:
|
|
####print('Haslach')
|
|
for i in named_entities:
|
|
if 'Boxberg' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Haslach', i)
|
|
if 'emmertsgrund' in stemmed_frage:
|
|
####print('Lahr')
|
|
for i in named_entities:
|
|
if 'Emmertsgrund' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('Lahr', i)
|
|
if 'rohrbach' in stemmed_frage:
|
|
####print('offenburg')
|
|
for i in named_entities:
|
|
if 'Rohrbach' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('offenburg', i)
|
|
if 'neuenheim' in stemmed_frage:
|
|
####print('Wolfach')
|
|
for i in named_entities:
|
|
if 'Neuenheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Wolfach', i)
|
|
|
|
if 'mitt' in stemmed_frage:
|
|
####print('Kehl')
|
|
for i in named_entities:
|
|
if 'Mitte' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Kehl', i)
|
|
|
|
if 'bahnstadt' in stemmed_frage:
|
|
####print('Achern')
|
|
for i in named_entities:
|
|
if 'Bahnstadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Achern', i)
|
|
|
|
if 'bergheim' in stemmed_frage:
|
|
####print('Haslach')
|
|
for i in named_entities:
|
|
if 'Bergheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Haslach', i)
|
|
if 'sudstadt' in stemmed_frage:
|
|
####print('Lahr')
|
|
for i in named_entities:
|
|
if 'Südstadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('Lahr', i)
|
|
if 'weststadt' in stemmed_frage:
|
|
####print('offenburg')
|
|
for i in named_entities:
|
|
if 'Weststadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('offenburg', i)
|
|
if 'handschuhsheim' in stemmed_frage:
|
|
####print('Wolfach')
|
|
for i in named_entities:
|
|
if 'Handschuhsheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Wolfach', i)
|
|
|
|
if 'altstadt' in stemmed_frage:
|
|
####print('Kehl')
|
|
for i in named_entities:
|
|
if 'Altstadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Kehl', i)
|
|
if named_entities == []:
|
|
#####print("keine Org!!!")
|
|
named_entities.append('')
|
|
named_entity_oefnungszeit[
|
|
'prefix'] = 'Vielleicht ist hier was Passendes dabei: <br> <a href="https://www.freiburg.de/pb/205348.html" target="_blank" role="button">Öffnungszeiten der Stadtverwaltung</a><br>'
|
|
|
|
named_entity_oefnungszeit['org'] = list(set(named_entities))
|
|
#####print('named_entity_oefnungszeit1', named_entity_oefnungszeit)
|
|
|
|
named_entity_oefnungszeit['zeit'] = extractDateOeffnungszeit(frage)
|
|
named_entity_oefnungszeit['termin'] = termin
|
|
#####print('named_entity_oefnungszeit2', named_entity_oefnungszeit)
|
|
return named_entity_oefnungszeit
|
|
|
|
|
|
def named_entity_infoort(frage):
|
|
named_entity_infoort = {}
|
|
named_entities = []
|
|
termin = ""
|
|
|
|
frage1 = str(frage)
|
|
|
|
sentence_word = nltk.word_tokenize(frage1)
|
|
##print('sentence_word!!!', sentence_word)
|
|
# generiere die Stopwörter
|
|
stop = stopwords.words('german')
|
|
ignore_words = ['?', '!', '.', ','] + stop
|
|
######Korrektur Schreibfehler
|
|
stemmed_frage = []
|
|
|
|
for word in sentence_word:
|
|
if word == "OB":
|
|
stemmed_frage.append("oberburgermeist")
|
|
word = word.lower()
|
|
word = spell.correction(word)
|
|
if word not in ignore_words or word == 'im' or word == 'am' or word == 'um' or word == 'was' or word == 'war' or word == 'zu' or word == 'nach' or word == 'kein' or word == 'keine' or word == 'ohne' or word == 'nicht' or word == 'dein' or word == 'wann' or word == 'dort' or word == 'dahin' or word == 'tüv' or word == 'wieder' or word == 'wie' or word == 'nicht' or word == 'viel' or word == 'wo' or word == 'wann' or word == 'warum' or word == 'wieso' or word == 'wohin' or word == 'wie' or word == 'an' or word == 'sich' or word == 'welche' or word == 'welcher' or word == 'welchen' or word == 'unter' or word == 'welches' or word == 'dir' or word == 'du' or word == 'viele' or word == 'mich' or word == 'jetzt' or word == 'mich' or word == 'bei' or word == 'darf' or word == 'kann':
|
|
word = stemmer.stem(word)
|
|
stemmed_frage.append(word)
|
|
|
|
named_entities = []
|
|
for k, v in org_entiti.items():
|
|
####print('v', v)
|
|
####print('k', k)
|
|
for item in v:
|
|
####print('item', item)
|
|
stemmed_entity = nltk.word_tokenize(item, language='german')
|
|
####print('stemmed_entity', stemmed_entity)
|
|
if containsAll(stemmed_frage, stemmed_entity) != 0:
|
|
named_entities.append(k)
|
|
named_entity_infoort['service'] = 'infoort'
|
|
if 'ziegelhaus' in stemmed_frage:
|
|
####print('Lahr')
|
|
for i in named_entities:
|
|
if 'Ziegelhausen' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('Lahr', i)
|
|
if 'schlierbach' in stemmed_frage:
|
|
####print('offenburg')
|
|
for i in named_entities:
|
|
if 'Schlierbach' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('offenburg', i)
|
|
if 'wiebling' in stemmed_frage:
|
|
####print('Wolfach')
|
|
for i in named_entities:
|
|
if 'Wieblingen' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Wolfach', i)
|
|
|
|
if 'pfaffengrund' in stemmed_frage:
|
|
####print('Kehl')
|
|
for i in named_entities:
|
|
if 'Pfaffengrund' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Kehl', i)
|
|
|
|
if 'kirchheim' in stemmed_frage:
|
|
####print('Achern')
|
|
for i in named_entities:
|
|
if 'Kirchheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Achern', i)
|
|
|
|
if 'boxberg' in stemmed_frage:
|
|
####print('Haslach')
|
|
for i in named_entities:
|
|
if 'Boxberg' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Haslach', i)
|
|
if 'emmertsgrund' in stemmed_frage:
|
|
####print('Lahr')
|
|
for i in named_entities:
|
|
if 'Emmertsgrund' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('Lahr', i)
|
|
if 'rohrbach' in stemmed_frage:
|
|
####print('offenburg')
|
|
for i in named_entities:
|
|
if 'Rohrbach' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('offenburg', i)
|
|
if 'neuenheim' in stemmed_frage:
|
|
####print('Wolfach')
|
|
for i in named_entities:
|
|
if 'Neuenheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Wolfach', i)
|
|
|
|
if 'mitt' in stemmed_frage:
|
|
####print('Kehl')
|
|
for i in named_entities:
|
|
if 'Mitte' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Kehl', i)
|
|
|
|
if 'bahnstadt' in stemmed_frage:
|
|
####print('Achern')
|
|
for i in named_entities:
|
|
if 'Bahnstadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Achern', i)
|
|
|
|
if 'bergheim' in stemmed_frage:
|
|
####print('Haslach')
|
|
for i in named_entities:
|
|
if 'Bergheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Haslach', i)
|
|
if 'sudstadt' in stemmed_frage:
|
|
####print('Lahr')
|
|
for i in named_entities:
|
|
if 'Südstadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('Lahr', i)
|
|
if 'weststadt' in stemmed_frage:
|
|
####print('offenburg')
|
|
for i in named_entities:
|
|
if 'Weststadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print ('offenburg', i)
|
|
if 'handschuhsheim' in stemmed_frage:
|
|
####print('Wolfach')
|
|
for i in named_entities:
|
|
if 'Handschuhsheim' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Wolfach', i)
|
|
|
|
if 'altstadt' in stemmed_frage:
|
|
####print('Kehl')
|
|
for i in named_entities:
|
|
if 'Altstadt' in i:
|
|
named_entities = []
|
|
named_entities.append(i)
|
|
####print('Kehl', i)
|
|
if named_entities == []:
|
|
####print("keine Org!!!")
|
|
named_entities.append('Bürgerservice')
|
|
|
|
print('named_entity_infoort1', named_entity_infoort)
|
|
named_entity_infoort['org'] = list(set(named_entities))
|
|
named_entity_infoort['termin'] = termin
|
|
print('named_entity_infoort1', named_entity_infoort)
|
|
|
|
return named_entity_infoort
|
|
|
|
|
|
|
|
# Rückgabe bag of words array: 0 oder 1 für jedes Wort in der 'bag', die im Satz existiert
|
|
def bow(frage, words, show_details=False):
|
|
liste = frageBearbeitung(frage)
|
|
sentence_words = liste
|
|
bag = [0] * len(words)
|
|
for s in sentence_words:
|
|
for i, w in enumerate(words):
|
|
if w == s:
|
|
bag[i] = 1
|
|
if show_details:
|
|
print("found in bag: %s" % w)
|
|
|
|
return (np.array(bag))
|
|
|
|
# lade unsre gespeicherte Modell
|
|
model.load('prodae/models/model.tflearn')
|
|
# Aufbau unseres Antwortprozessors.
|
|
# Erstellen einer Datenstruktur, die den Benutzerkontext enthält
|
|
context = {}
|
|
|
|
ERROR_THRESHOLD = 0.01
|
|
|
|
|
|
def klassifizieren(frage):
|
|
# generiere Wahrscheinlichkeiten von dem Modell
|
|
results = model.predict([bow(frage, words)])[0]
|
|
|
|
# herausfiltern Vorhersagen unterhalb eines Schwellenwerts
|
|
results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]
|
|
# nach Stärke der Wahrscheinlichkeit sortieren
|
|
results.sort(key=lambda x: x[1], reverse=True)
|
|
return_list = []
|
|
for r in results:
|
|
return_list.append((classes[r[0]], r[1]))
|
|
return return_list
|
|
|
|
|
|
def antwort(frage, userId, show_details=False):
|
|
print('Context#####', context)
|
|
|
|
d_itent = []
|
|
|
|
for itent in dialogflow['antwort_leistungen']:
|
|
try:
|
|
d_itent.append([itent['intent']])
|
|
except KeyError:
|
|
d_itent.append([itent['intent']])
|
|
|
|
d_itent = [item for sublist in d_itent for item in sublist]
|
|
|
|
|
|
if frage in d_itent:
|
|
#print('12')
|
|
next_intent = frage
|
|
results = [(next_intent, 1.0)]
|
|
#####print(results)
|
|
#####print(results[0][0])
|
|
while results:
|
|
for i in dialogflow['antwort_leistungen']:
|
|
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results[0][0]:
|
|
a = i['intent']
|
|
|
|
b = re.sub('sbw_.*?_', '', a, flags=re.DOTALL)
|
|
|
|
intent_context1 = i['intent']
|
|
context[userId] = intent_context1
|
|
ant = i['antwort']
|
|
print('ant1!!!', ant)
|
|
if 'sbw_preamble' in i['intent']:
|
|
text = str(i['antwort'][0])
|
|
number_of_sentences = sent_tokenize(text)
|
|
print('len(number_of_sentences)', len(number_of_sentences))
|
|
|
|
if len(number_of_sentences) > 2:
|
|
sum = sum_kata(text)
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + sum + '<details><summary><strong><em>Genauere Informationen finden Sie hier!</em></strong></summary>' + text + '</details><br>'
|
|
voice = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + sum + '<p>Weitere Informationen finden Sie im Textfenster</p>'
|
|
else:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + \
|
|
i['antwort'][0]
|
|
|
|
voice = ant + '<p>Weitere Informationen finden Sie im Textfenster</p>'
|
|
ant1 = ant + (
|
|
'<!-- Chatbot Card # WEITERE INFORMATIONEN --><div class="chatbot-card__details chatbot-card__details--sbwLeistung"><!-- Chatbot Card # TITEL DETAILS --><div class="chatbot-card__detailsTitle"><p>Welche zusätzlichen Informationen möchten Sie ansehen?</p></div><!-- Chatbot Card # SELECTION --><div class="chatbot-card__selection"> <ul class="selection__items"> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_kosten_%s\')">Kosten</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_organisationseinheiten_%s\')">Zuständige Stelle</button></li> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_oeffnungszeiten_%s\')">Öffnungszeiten</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_prozesse_%s\')">Online Anträge</button></li> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_formulare_%s\')">Online Formulare</button></li> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_unterlagen_%s\')">Erforderliche Unterlagen</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_fristen_%s\')">Fristen</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_bearbeitungsdauer_%s\')">Bearbeitungsdauer</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_voraussetzungen_%s\')">Voraussetzungen</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_verfahrensablauf_%s\')">Verfahrensablauf</button></li> </ul></div> </div>' % (
|
|
b, b, b, b, b, b, b, b, b, b))
|
|
|
|
return [ant1, voice]
|
|
|
|
|
|
if not ant:
|
|
if 'sbw_kosten' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
|
|
if 'sbw_bearbeitungsdauer' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
if 'sbw_fristen' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
else:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Keine Angabe</p>'
|
|
return ant
|
|
if i['antwort'][0] == 'keine Angaben':
|
|
print('hier1!!!')
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Es gibt keine Online Anträge.</p>'
|
|
return ant
|
|
if i['antwort'][0] == '<p>keine</p>':
|
|
print('hier1!!!')
|
|
if 'sbw_kosten' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Diese Leistung ist kostenfrei. </p>'
|
|
return ant
|
|
|
|
if 'sbw_bearbeitungsdauer' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Die Bearbeitung erfolgt sofort. </p>'
|
|
return ant
|
|
|
|
if 'sbw_fristen' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Sie müssen keine Fristen beachten. </p>'
|
|
return ant
|
|
|
|
else:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>keine</p>'
|
|
return ant
|
|
else:
|
|
print('hier2!!!')
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + i['antwort'][0]
|
|
return ant
|
|
|
|
#####print('1')
|
|
|
|
results.pop(0)
|
|
|
|
else:
|
|
#print('klassifi')
|
|
stemmed_frage = hilf_entity(frage)
|
|
if len(named_entity(stemmed_frage)) != 0:
|
|
frage=pruef_sbw(stemmed_frage)
|
|
print('fragebearb', frage)
|
|
results = klassifizieren(frage)
|
|
print('klassifiresults', results)
|
|
#print('fragebearb')
|
|
#print('stemmed_frage=hilf_entity(frage)', stemmed_frage)
|
|
#####print('13')
|
|
###print(results)
|
|
accurancy = results[0][1]
|
|
frage_test = "bgfdtgh"
|
|
#wert_accurancy = klassifizieren(frage_test)[0][1]
|
|
wert_accurancy = 0.30
|
|
print('wert_accurancy', wert_accurancy)
|
|
print('accurancy', accurancy)
|
|
#kontextliste = load_kontextliste(userId)
|
|
# #####print(results[0][0])
|
|
|
|
while results:
|
|
for i in dialogflow['dialogflow']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results[0][0]:
|
|
# Setze bei Bedarf Kontext für diese dialog
|
|
# Überprüfe, ob diese Inetnt kontextbezogen ist
|
|
|
|
|
|
if i['intent'] == 'lebenslage_fuehrerschein':
|
|
ant = i['antwort']
|
|
voice='<p>Hier erfahren Sie</p><ul> <li>wie Sie einen Führerschein erhalten können,</li> <li>welche Nachweise Sie benötigen und</li> <li>was Sie in diesem Zusammenhang noch alles wissen sollten.</li> </ul><p>Wählen Sie bitte die für Sie richtige Leistung</p>'
|
|
return [ant, voice]
|
|
|
|
|
|
if i['intent'] == 'sbw_preamble':
|
|
##print('frage', frage)
|
|
entity_sbw = named_entity(stemmed_frage)
|
|
if len(entity_sbw) > 0:
|
|
#####print('entity sbw!')
|
|
context[userId] = {}
|
|
context[userId] = 'sbw_preamble'
|
|
entity_sbw = entity_sbw.strip("''")
|
|
next_intent = context[userId] + '_' + entity_sbw
|
|
###print('next_intent_label', next_intent)
|
|
results = [(next_intent, 1.0)]
|
|
#####print(results)
|
|
#####print(results[0][0])
|
|
context[userId] = next_intent
|
|
#####print('context1:', context)
|
|
while results:
|
|
for i in dialogflow['antwort_leistungen']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results[0][0]:
|
|
a = i['intent']
|
|
b = re.sub('sbw_.*?_', '', a, flags=re.DOTALL)
|
|
ant = i['antwort']
|
|
text = str(i['antwort'][0])
|
|
number_of_sentences = sent_tokenize(text)
|
|
#print('len(number_of_sentences)', len(number_of_sentences))
|
|
|
|
if not ant:
|
|
ant= '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>'+ '<p>Keine Angabe</p>'
|
|
|
|
|
|
else:
|
|
|
|
text = str(i['antwort'][0])
|
|
|
|
number_of_sentences = sent_tokenize(text)
|
|
|
|
# print('len(number_of_sentences)', len(number_of_sentences))
|
|
|
|
if len(number_of_sentences) > 2:
|
|
|
|
sum = sum_kata(text)
|
|
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + sum + '<details><summary><strong><em>Genauere Informationen finden Sie hier!</em></strong></summary>' + text + '</details><br>'
|
|
|
|
voice = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + sum + '<p>Weitere Informationen finden Sie im Textfenster</p>'
|
|
|
|
|
|
else:
|
|
|
|
if i['antwort'][0] == 'keine Angaben':
|
|
|
|
print('hier1!!!')
|
|
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Es gibt keine Online Anträge.</p>'
|
|
|
|
else:
|
|
|
|
print('hier2!!!')
|
|
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + \
|
|
\
|
|
i['antwort'][0]
|
|
|
|
voice = ant + '<p>Weitere Informationen finden Sie im Textfenster</p>'
|
|
|
|
ant1 = ant + (
|
|
'<!-- Chatbot Card # WEITERE INFORMATIONEN --><div class="chatbot-card__details chatbot-card__details--sbwLeistung"><!-- Chatbot Card # TITEL DETAILS --><div class="chatbot-card__detailsTitle"><p>Welche zusätzlichen Informationen möchten Sie ansehen?</p></div><!-- Chatbot Card # SELECTION --><div class="chatbot-card__selection"> <ul class="selection__items"> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_kosten_%s\')">Kosten</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_organisationseinheiten_%s\')">Zuständige Stelle</button></li> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_oeffnungszeiten_%s\')">Öffnungszeiten</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_prozesse_%s\')">Online Anträge</button></li> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_formulare_%s\')">Online Formulare</button></li> <li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_unterlagen_%s\')">Erforderliche Unterlagen</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_fristen_%s\')">Fristen</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_bearbeitungsdauer_%s\')">Bearbeitungsdauer</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_voraussetzungen_%s\')">Voraussetzungen</button></li><li class="selection__item"><button class="btn-ortena" data-intent="sbw_preamble" onclick="myFunction(\'sbw_verfahrensablauf_%s\')">Verfahrensablauf</button></li> </ul></div> </div>' % (
|
|
b, b, b, b, b, b, b, b, b, b))
|
|
|
|
#####print('ant', ant)
|
|
#####print('ant1', ant1)
|
|
return [ant1, voice]
|
|
|
|
results.pop(0)
|
|
else:
|
|
#####print('keine sbw_entität!!!')
|
|
|
|
#####print('keine sbw_entität und kein kontext!!!!')
|
|
#####print('accurancy', accurancy)
|
|
#####print('wert_accurancy', wert_accurancy)
|
|
#stemmed_frage= frageBearbeitung(frage)
|
|
#####print('frage!!!', stemmed_frage)
|
|
|
|
if 'fuhrerschein' in stemmed_frage:
|
|
# print('len(stemmed_frage)', len(stemmed_frage))
|
|
|
|
results = 'lebenslage_fuehrerschein'
|
|
print('resultsstemmed_frage_intent', results)
|
|
for i in dialogflow['dialogflow']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results:
|
|
print('stemmed_frage_intent')
|
|
ant = i['antwort']
|
|
voice = '<p>Hier erfahren Sie</p><ul> <li>wie Sie einen Führerschein erhalten können,</li> <li>welche Nachweise Sie benötigen und</li> <li>was Sie in diesem Zusammenhang noch alles wissen sollten.</li> </ul><p>Wählen Sie bitte die für Sie richtige Leistung</p>'
|
|
|
|
return [ant, voice]
|
|
|
|
else:
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
|
|
if 'sbw_' in i['intent']:
|
|
#print('sbw_ in i[intent]')
|
|
entity_sbw = named_entity(stemmed_frage)
|
|
if len(entity_sbw) > 0:
|
|
#####print('entity sbw!')
|
|
|
|
context[userId] = {}
|
|
context[userId] = i['intent']
|
|
##print('context[userId]', context[userId])
|
|
entity_sbw = entity_sbw.strip("''")
|
|
next_intent = context[userId] + '_' + entity_sbw
|
|
##print('next_intent_label', next_intent)
|
|
results = [(next_intent, 1.0)]
|
|
#####print(results)
|
|
#####print(results[0][0])
|
|
context[userId] = next_intent
|
|
#####print('context1:', context)
|
|
while results:
|
|
for i in dialogflow['antwort_leistungen']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results[0][0]:
|
|
ant = i['antwort']
|
|
|
|
|
|
if not ant:
|
|
if 'sbw_kosten' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
|
|
if 'sbw_bearbeitungsdauer' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
if 'sbw_fristen' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
else:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
else:
|
|
if i['antwort'][0] == 'keine Angaben':
|
|
print('hier1!!!')
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Es gibt keine Online Anträge.</p>'
|
|
return ant
|
|
if i['antwort'][0] == '<p>keine</p>':
|
|
print('hier1!!!')
|
|
if 'sbw_kosten' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Diese Leistung ist kostenfrei. </p>'
|
|
return ant
|
|
|
|
if 'sbw_bearbeitungsdauer' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Die Bearbeitung erfolgt sofort. </p>'
|
|
return ant
|
|
|
|
if 'sbw_fristen' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Sie müssen keine Fristen beachten. </p>'
|
|
return ant
|
|
|
|
else:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>keine</p>'
|
|
return ant
|
|
else:
|
|
print('hier2!!!')
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + i['antwort'][
|
|
0]
|
|
return ant
|
|
return ant
|
|
|
|
results.pop(0)
|
|
if len(entity_sbw) == 0:
|
|
print('keine sbw_entitäterfedferdf!!!')
|
|
#print('userId', context[userId])
|
|
stemmed_frage = frageBearbeitung(frage)
|
|
if userId in context and 'sbw_' in context[userId]:
|
|
print('sbw_ in context keine sbw_entitäterfedferdf!!!')
|
|
frage_neu = pruef_kontext(frage)
|
|
print('frage_neu', frage_neu)
|
|
post = pos(frage_neu)
|
|
print('ueberpruefung_kontext', pos)
|
|
|
|
###print('frage!', stemmed_frage)
|
|
|
|
if 'fuhrerschein' in stemmed_frage:
|
|
# print('len(stemmed_frage)', len(stemmed_frage))
|
|
|
|
results = 'lebenslage_fuehrerschein'
|
|
print('resultsstemmed_frage_intent', results)
|
|
for i in dialogflow['dialogflow']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results:
|
|
print('stemmed_frage_intent')
|
|
ant = i['antwort']
|
|
voice = '<p>Hier erfahren Sie</p><ul> <li>wie Sie einen Führerschein erhalten können,</li> <li>welche Nachweise Sie benötigen und</li> <li>was Sie in diesem Zusammenhang noch alles wissen sollten.</li> </ul><p>Wählen Sie bitte die für Sie richtige Leistung</p>'
|
|
|
|
return [ant, voice]
|
|
|
|
if 'NN' in post:
|
|
if i['intent'] == 'sbw_oeffnungszeiten' and accurancy >0.28:
|
|
ant = "<div class=\"chatbot-card chatbot-card--org\">\n<div class=\"card-header chatbot-card__header\">\n<div class=\"chatbot-card__title\">\n<div class=\"orgTitle\">Öffnungszeiten Kfz-Bürgerbüro / Führerschein</div>\n</div>\n</div>\n<div class=\"card-body chatbot-card__body\">\n<div>\n<div class=\"sp-oe-oeffnungszeit-typ\">Allgemeine Sprechzeit</div>\n<div class=\"sp-oe-oeffnungszeit-zeiten\">\n<div><span>Montag <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Dienstag <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Mittwoch <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Donnerstag <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Freitag <span>8 - 12 Uhr</span></span></div>\n<div><span><span></span></span></div>\n</div>\n</div>\n</div>\n</div>"
|
|
|
|
return (ant)
|
|
else:
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
else:
|
|
a = context[userId]
|
|
label = re.sub('sbw_.*?_', '', a, flags=re.DOTALL)
|
|
next_intent = i['intent'] + '_' + label
|
|
print('next_intent_label', next_intent)
|
|
results = [(next_intent, 1.0)]
|
|
print(results)
|
|
print(results[0][0])
|
|
context[userId] = next_intent
|
|
print('context1:', context)
|
|
while results:
|
|
for i in dialogflow['antwort_leistungen']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results[0][0]:
|
|
# Setze bei Bedarf Kontext für diese dialog
|
|
ant = i['antwort']
|
|
print('ant', ant)
|
|
|
|
|
|
if not ant:
|
|
if 'sbw_kosten' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
|
|
if 'sbw_bearbeitungsdauer' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
if 'sbw_fristen' in i['intent']:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
else:
|
|
ant = '<p>Leider kann ich Ihnen dazu keine Informationen liefern. Bitte wenden Sie sich ans Kfz-Bürgerbüro / Führerschein</p>'
|
|
return ant
|
|
else:
|
|
if i['antwort'][0] == 'keine Angaben':
|
|
print('hier1!!!')
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Es gibt keine Online Anträge.</p>'
|
|
return ant
|
|
if i['antwort'][0] == '<p>keine</p>':
|
|
print('hier1!!!')
|
|
if 'sbw_kosten' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Diese Leistung ist kostenfrei. </p>'
|
|
return ant
|
|
|
|
if 'sbw_bearbeitungsdauer' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Die Bearbeitung erfolgt sofort. </p>'
|
|
return ant
|
|
|
|
if 'sbw_fristen' in i['intent']:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>Sie müssen keine Fristen beachten. </p>'
|
|
return ant
|
|
|
|
else:
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + '<p>keine</p>'
|
|
return ant
|
|
else:
|
|
print('hier2!!!')
|
|
ant = '<p>Ich habe folgende Informationen auf dem Portal service-bw für Sie gefunden:</p>' + \
|
|
i['antwort'][
|
|
0]
|
|
return ant
|
|
|
|
return ant
|
|
|
|
results.pop(0)
|
|
else:
|
|
#print('keine sbw_entität und kein kontext!!!!')
|
|
|
|
|
|
|
|
if i['intent'] == 'sbw_organisationseinheiten' and accurancy > float(wert_accurancy):
|
|
ant = "<div class=\"chatbot-card chatbot-card--org\">\n<div class=\"card-header chatbot-card__header\">\n<div class=\"chatbot-card__title\"></div>\n</div>\n<div class=\"card-body chatbot-card__body\">\n<div class=\"organization\">Kfz-Bürgerbüro / Führerschein</div>\n<div class=\"adr\">\n<div class=\"street-address\">Badstraße 20</div>\n<div class=\"cityline\"> <span class=\"locality\">Offenburg</span></div>\n<div class=\"phone\"><span class=\"phone\">Telefon: 0781 805 9495</span></div>\n</div>\n</div>\n</div>"
|
|
return (ant)
|
|
|
|
if i['intent'] == 'sbw_oeffnungszeiten' and accurancy > float(wert_accurancy):
|
|
ant = "<div class=\"chatbot-card chatbot-card--org\">\n<div class=\"card-header chatbot-card__header\">\n<div class=\"chatbot-card__title\">\n<div class=\"orgTitle\">Öffnungszeiten Kfz-Bürgerbüro / Führerschein</div>\n</div>\n</div>\n<div class=\"card-body chatbot-card__body\">\n<div>\n<div class=\"sp-oe-oeffnungszeit-typ\">Allgemeine Sprechzeit</div>\n<div class=\"sp-oe-oeffnungszeit-zeiten\">\n<div><span>Montag <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Dienstag <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Mittwoch <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Donnerstag <span>8 - 12 Uhr und 14 - 16 Uhr</span></span></div>\n<div><span>Freitag <span>8 - 12 Uhr</span></span></div>\n<div><span><span></span></span></div>\n</div>\n</div>\n</div>\n</div>"
|
|
|
|
return (ant)
|
|
|
|
if 'fuhrerschein' in stemmed_frage:
|
|
# print('len(stemmed_frage)', len(stemmed_frage))
|
|
|
|
results = 'lebenslage_fuehrerschein'
|
|
print('resultsstemmed_frage_intent', results)
|
|
for i in dialogflow['dialogflow']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results:
|
|
print('stemmed_frage_intent')
|
|
ant = i['antwort']
|
|
voice = '<p>Hier erfahren Sie</p><ul> <li>wie Sie einen Führerschein erhalten können,</li> <li>welche Nachweise Sie benötigen und</li> <li>was Sie in diesem Zusammenhang noch alles wissen sollten.</li> </ul><p>Wählen Sie bitte die für Sie richtige Leistung</p>'
|
|
|
|
return [ant, voice]
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
if 'fuhrerschein' in stemmed_frage:
|
|
# print('len(stemmed_frage)', len(stemmed_frage))
|
|
|
|
results = 'lebenslage_fuehrerschein'
|
|
print('resultsstemmed_frage_intent', results)
|
|
for i in dialogflow['dialogflow']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results:
|
|
print('stemmed_frage_intent')
|
|
ant = i['antwort']
|
|
voice = '<p>Hier erfahren Sie</p><ul> <li>wie Sie einen Führerschein erhalten können,</li> <li>welche Nachweise Sie benötigen und</li> <li>was Sie in diesem Zusammenhang noch alles wissen sollten.</li> </ul><p>Wählen Sie bitte die für Sie richtige Leistung</p>'
|
|
|
|
return [ant, voice]
|
|
|
|
else:
|
|
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
|
|
|
|
if 'kontext' in i:
|
|
#####print('11111')
|
|
if show_details: print('context1111:', i['kontext'])
|
|
context[userId] = i['kontext']
|
|
#####print(context)
|
|
|
|
|
|
if i['intent'] == 'adresse_infoorte' and accurancy < float(wert_accurancy):
|
|
|
|
if 'fuhrerschein' in stemmed_frage:
|
|
# print('len(stemmed_frage)', len(stemmed_frage))
|
|
|
|
results = 'lebenslage_fuehrerschein'
|
|
print('resultsstemmed_frage_intent', results)
|
|
for i in dialogflow['dialogflow']:
|
|
# finde ein intent, das dem ersten Ergebnis entspricht
|
|
if i['intent'] == results:
|
|
print('stemmed_frage_intent')
|
|
ant = i['antwort']
|
|
voice = '<p>Hier erfahren Sie</p><ul> <li>wie Sie einen Führerschein erhalten können,</li> <li>welche Nachweise Sie benötigen und</li> <li>was Sie in diesem Zusammenhang noch alles wissen sollten.</li> </ul><p>Wählen Sie bitte die für Sie richtige Leistung</p>'
|
|
|
|
return [ant, voice]
|
|
|
|
else:
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
|
|
|
|
|
|
if i['intent'] == 'smalltalk_uhrzeit':
|
|
print('Uhrzeit')
|
|
ant = datetime.now().strftime('%H:%M:%S')
|
|
return (ant)
|
|
if i['intent'] == 'smalltalk_datum':
|
|
#####print('Datum')
|
|
ant = date.today().strftime('%d.%m.%Y')
|
|
return (ant)
|
|
|
|
|
|
if not 'kontextpruefung' in i or \
|
|
(userId in context and 'kontextpruefung' in i and i['kontextpruefung'] == context[userId]):
|
|
####print('if not kontextpruefung in i or userId in context and kontextpruefung in i and i[kontextpruefung] == context[userId]')
|
|
#####print(context)
|
|
#####print(i['intent'])
|
|
|
|
|
|
if 'kontextneu' in i:
|
|
#####print('3')
|
|
context[userId] = i['kontextneu']
|
|
if show_details: print('context2:', i['kontextneu'])
|
|
return random.choice(i['antwort'])
|
|
|
|
|
|
|
|
if not 'kontextpruefung' in i:
|
|
|
|
if 'smalltalk_' in i['intent'] and accurancy>float(wert_accurancy):
|
|
####print('smalltalk!')
|
|
return random.choice(i['antwort'])
|
|
|
|
|
|
if accurancy > float(wert_accurancy):
|
|
#####print('accurancy', accurancy)
|
|
#####print('float(wert_accurancy)', float(wert_accurancy))
|
|
####print('2.1')
|
|
#####print(context)
|
|
ant = random.choice(i['antwort'])
|
|
return ant
|
|
|
|
|
|
if accurancy <= float(wert_accurancy):
|
|
|
|
|
|
#####print('2.2')
|
|
#####print(context)
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
|
|
ant = random.choice(i['antwort'])
|
|
return ant
|
|
|
|
|
|
if not userId in context and 'kontextpruefung' in i:
|
|
####print('if not userId in context and kontextpruefung in i')
|
|
# #####print('kontextpruefung1:', i['kontextpruefung'])
|
|
if show_details: print('intent:', i['intent'])
|
|
# generiere eine zufällige Antwort von dem dialog
|
|
if 'kontextneu' in i:
|
|
#####print('3')
|
|
context[userId] = i['kontextneu']
|
|
if show_details: print('context2:', i['kontextneu'])
|
|
if accurancy <= float(wert_accurancy):
|
|
####print('2.2')
|
|
#####print(context)
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
|
|
if accurancy > float(wert_accurancy):
|
|
#####print('accurancy', accurancy)
|
|
#####print('float(wert_accurancy)', float(wert_accurancy))
|
|
#####print('2.1')
|
|
#####print(context)
|
|
ant = random.choice(i['antwort'])
|
|
return ant
|
|
|
|
|
|
if userId in context and 'kontextpruefung' in i and i['kontextpruefung'] != context[userId]:
|
|
####print('if userId in context and kontextpruefung in i and i[kontextpruefung] != context[userId]')
|
|
#####print('4')
|
|
# #####print('kontextpruefung1:', i['kontextpruefung'])
|
|
if show_details: print('intent:', i['intent'])
|
|
# generiere eine zufällige Antwort von dem dialog
|
|
if 'kontextneu' in i:
|
|
#####print('3')
|
|
context[userId] = i['kontextneu']
|
|
if show_details: print('context2:', i['kontextneu'])
|
|
if accurancy <= float(wert_accurancy):
|
|
####print('2.2')
|
|
#####print(context)
|
|
return ('<p>Ich bin ein Prototyp, der Ihnen verschiedene Fragen zum Thema Führerschein beantworten kann.</p><p>Um Ihnen weiterhelfen zu können, benötige ich genauere Informationen. <br> Können Sie bitte Ihre Frage anders formulieren?</p>')
|
|
|
|
|
|
if accurancy > float(wert_accurancy):
|
|
#####print('accurancy', accurancy)
|
|
#####print('float(wert_accurancy)', float(wert_accurancy))
|
|
####print('2.1')
|
|
#####print(context)
|
|
ant = random.choice(i['antwort'])
|
|
return ant
|
|
|
|
|
|
results.pop(0)
|
|
|