You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
import re
|
|
|
|
import spacy
|
|
|
|
from utils.voice import sanitize_text
|
|
|
|
|
|
|
|
#working good
|
|
|
|
def posttextparser(obj):
|
|
|
|
|
|
|
|
text=re.sub("\n", "", obj )
|
|
|
|
|
|
|
|
try:
|
|
|
|
nlp=spacy.load('en_core_web_sm')
|
|
|
|
except OSError :
|
|
|
|
|
|
|
|
print("dev:please dowload the model with this command \npython -m spacy download en")
|
|
|
|
exit()
|
|
|
|
|
|
|
|
doc= nlp(text)
|
|
|
|
|
|
|
|
newtext:list = []
|
|
|
|
|
|
|
|
|
|
|
|
# to check for space str
|
|
|
|
for line in doc.sents:
|
|
|
|
if sanitize_text(line.text):
|
|
|
|
newtext.append(line.text)
|
|
|
|
# print(line)
|
|
|
|
|
|
|
|
return newtext
|