RAG on Neo4j database

RAGNeo4j

image/png

Requirements and disclaimers

  • To run this notebook you will need to Download Neo4j Desktop or have access to a Neo4j DB via cloud
  • If your Neo4j DB is running on your localhost we recommend running this cookbook on your local rather than via Google Colab
! pip install mistralai neo4j
import json
from neo4j import GraphDatabase
from mistralai import Mistral
from getpass import getpass
api_key= getpass("Type your API Key")
neo4j_password =  getpass("Type your neo4j password")
neo4j_user =  getpass("Type your neo4j user name")
neo4j_uri =  getpass("Type your neo4j url")
client = Mistral(api_key=api_key)

Step 1 : Download Neo4j Desktop and check python client access

  • Install neo4j desktop

  • Open the default project Example Project containing the Movie DBMD database

image/png

URI = neo4j_uri
AUTH = (neo4j_user, neo4j_password)

def run_cypher_query(cypher_query):
    with GraphDatabase.driver(URI, auth=AUTH) as driver:
        records, _, _ = driver.execute_query(cypher_query,database_="neo4j")
    return records

Step 2 : Create a text-2-cypher agent for the Movie DBMD database

image/png

def generate_cypher_query(question):
    prompt = f"""You are a coding agent interacting with a Neo4j database with the following schema : 

    - Labels : "Movie", "Person"
    - Relationships : "ACTED_IN", "DIRECTED", "FOLLOWS", "PRODUCED", "REVIEWED", "WROTE"

    "Person" label has the following properties : 
    - born
    - name 

    "Movie" label has the following properties :
    - title
    - released

    Your will be given as input a query in natural language and your role is to output a cypher query whose output will contain the answer.
    Your output with be in a json format. 

    Examples : 

    input : When was the movie "The Matrix" released ?
    output : {{"result": "MATCH (n:Movie) WHERE n.title='The Matrix' RETURN n.released"}}

    input : In which movied Tom Hanks played ?
    output : {{"result": "MATCH (p:Person {{name: 'Tom Hanks'}})-[:ACTED_IN]->(m:Movie) RETURN m.title AS movieTitle"}}

    input : What movie Steven Spielber produced ?
    output : {{"result": "MATCH (p:Person {{name: 'Steven Spielberg'}})-[:PRODUCED]->(m:Movie) RETURN m.title AS movieTitle"}}

    Here is the user question :
    {question}
    """

    chat_response = client.chat.complete(
    model= model,
    response_format = {"type": "json_object"},
    messages = [
        {
            "role": "user",
            "content": prompt,
        },
    ]
    )
    return chat_response.choices[0].message.content

Step 3 : Create a response agent

def respond_to_query(question, cypher_code, query_output):
    prompt = f"""You are a coding agent interacting with a Neo4j database with the following schema : 

    - Labels : "Movie", "Person"
    - Relationships : "ACTED_IN", "DIRECTED", "FOLLOWS", "PRODUCED", "REVIEWED", "WROTE"

    "Person" label has the following properties : 
    - born
    - name 

    "Movie" label has the following properties :
    - title
    - released

    The user asked the following question :
    {question}

    To answee the question the following cypher query was run on Neo4j : 
    {cypher_code}

    The following output was obtained :
    {query_output}

    Based on all these elements answer the initial user question. 
    Be straight to the point and concise in your answers.

    Your answer:
    """

    chat_response = client.chat.complete(
    model= model,
    messages = [
        {
            "role": "user",
            "content": prompt,
        },
    ]
    )
    return chat_response.choices[0].message.content

Step 4 : Test the end to end workflow

def neo4j_agent(question):
    cypher_code = json.loads(generate_cypher_query(question))['result']
    query_result = run_cypher_query(cypher_code)
    response = respond_to_query(question, cypher_code, query_result)

    print(f'Question : \n {question} \n')
    print(f'Query : \n {cypher_code} \n')
    print(f'Response : \n {response} \n')
 
neo4j_agent("When was Keanue Reeves born ?")
neo4j_agent("What actors played in the movie The Matrix ?")
neo4j_agent("Tell me the name of a person that is both an actor and a producer on another movie")
neo4j_agent("List Tom Hanks movies and sort them by release date")
neo4j_agent("We are in 2024, how old is Tom Hanks ?")
neo4j_agent("Give me names of two actors that played together in two differnet films. Give me the names of the associated movies")