Based on playing with APIs for voice assistants like Alexa or DialogFlow I wanted to explore how the API could work under the hood. It takes a string as input and determines what your intent is using nearest neighbours of document vectors.
This is a very limited proof of concept. There is a lot more to doing this well. However, it's amazing how much can be done using off the shelf models with spaCy.
One impressive feat is that it can take the string "Is it raining?" and determine that you're asking about the weather. Unforunately doing it this way you cannot respond with a yes/no if it's raining because we've grouped that into the weather
intent. This solution requires coding new intents for everything you want the assistant to do (which is how the current assistants seem to work).
import spacy
import os
import json
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
%%time
nlp = spacy.load("en_core_web_lg")
doc = nlp("what time is it in Toronto?")
for e in doc.ents:
print(f'{e} is a {e.label_}')
# this is a very dirty way of doing this, but works for this demo
# the intents are mapped to the strings below
intents = [
'time',
'time',
'time',
'time',
'weather',
'weather',
'weather',
]
intent_strings = [
'time in place',
'current time',
'what is the time?',
'what is the time in place?',
'what is the weather in place?',
'what is the weather?',
'current weather',
]
# Create document vectors for each string
vectors = [nlp(text).vector for text in intent_strings]
# Nearest neighbours index of the strings
neigh = NearestNeighbors(n_neighbors=5, algorithm='brute', metric='cosine', radius=0.1)
neigh.fit(vectors)
# I determined this threshold manually and it is a weak part of this system
# We could automate it by taking our corpus of intent strings
# and finding the min(distance) to the wrong intent for each string
# e.g. try `what is the weather?` and take the score of the first intent that is not `weather`
# then repeat for all intent strings and take the minimum score.
THRESHOLD = 0.2
def list_possible_intents(input: str, debug: bool = False):
query = nlp(input).vector
scores, indices = neigh.kneighbors([query], 5, return_distance=True)
possible_intents = []
for score, index in zip(scores[0], indices[0]):
if debug:
print(f'{score:.3f}: {intents[index]}\t\tString {intent_strings[index]}')
possible_intents.append((intents[index], score, score <= THRESHOLD))
return possible_intents
def find_intent(input: str, debug: bool = False):
possible_intents = list_possible_intents(input, debug)
if len(possible_intents) > 0:
intent, score, passes_threshold = possible_intents[0]
if passes_threshold:
return intent
find_intent('what is the time in Toronto?', debug=True)
def find_entities(input: str):
doc = nlp(input)
return dict((e.label_, str(e)) for e in doc.ents)
find_entities('what is the time in Toronto?')
find_entities('what is the time?')
def query(input: str):
intent = find_intent(input)
if intent:
entities = find_entities(input)
gpe = entities.get('GPE', 'DEFAULT')
print(f'Return `{intent}` for GPE: `{gpe}`')
else:
print(f'I am not sure what you mean by: `{input}`')
query('what time is it?')
query('what is the time for Toronto?')
# manages to determine that 'is it raining' is related to weather
query('is it raining?')
# test unknown
query('call Bill')
# it has to return only 1 :(
query('what is the time and weather in Toronto?')