Time stuff and switch to gensim for tokenisation

This commit is contained in:
2022-10-01 14:46:45 +01:00
parent 40cf0c6430
commit 817bfd8835
4 changed files with 100 additions and 41 deletions

View File

@@ -16,7 +16,8 @@ COPY requirements.txt /code/
COPY discord-patched.tgz /code/
RUN python -m venv /venv
RUN . /venv/bin/activate && pip install -r requirements.txt && python -m spacy download en_core_web_sm
RUN . /venv/bin/activate && pip install -r requirements.txt
# && python -m spacy download en_core_web_sm
RUN tar xf /code/discord-patched.tgz -C /venv/lib/python3.10/site-packages

View File

@@ -15,7 +15,8 @@ pycld2
morfessor
six
nltk
spacy
#spacy
gensim
python-Levenshtein
orjson
uvloop