PropertyGraph using Neo4j - Mistral AI Cookbook

In this notebook we will demonstrate building PropertyGraph using Neo4j

Neo4j is a production-grade graph database that excels in storing property graphs, performing vector searches, filtering, and more.

The simplest way to begin is by using a cloud-hosted instance through Neo4j Aura. However, for the purposes of this notebook, we will focus on how to run the database locally using Docker.

%pip install llama-index-core 
%pip install llama-index-graph-stores-neo4j
%pip install llama-index-llms-mistralai
%pip install llama-index-embeddings-mistralai

%pip install llama-index-core 
%pip install llama-index-graph-stores-neo4j
%pip install llama-index-llms-mistralai
%pip install llama-index-embeddings-mistralai

Docker Setup

You need to login and set password for the first time.

username: neo4j
password: neo4j

!docker run \
    -p 7474:7474 -p 7687:7687 \
    -v $PWD/data:/data -v $PWD/plugins:/plugins \
    --name neo4j-apoc \
    -e NEO4J_apoc_export_file_enabled=true \
    -e NEO4J_apoc_import_file_enabled=true \
    -e NEO4J_apoc_import_file_use__neo4j__config=true \
    -e NEO4JLABS_PLUGINS=\[\"apoc\"\] \
    neo4j:latest

!docker run \
    -p 7474:7474 -p 7687:7687 \
    -v $PWD/data:/data -v $PWD/plugins:/plugins \
    --name neo4j-apoc \
    -e NEO4J_apoc_export_file_enabled=true \
    -e NEO4J_apoc_import_file_enabled=true \
    -e NEO4J_apoc_import_file_use__neo4j__config=true \
    -e NEO4JLABS_PLUGINS=\[\"apoc\"\] \
    neo4j:latest

Setup

import nest_asyncio

nest_asyncio.apply()

from IPython.display import Markdown, display

import nest_asyncio

nest_asyncio.apply()

from IPython.display import Markdown, display

import os
os.environ['MISTRAL_API_KEY'] = 'YOUR MISTRAL API KEY'

import os
os.environ['MISTRAL_API_KEY'] = 'YOUR MISTRAL API KEY'

from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.llms.mistralai import MistralAI

llm = MistralAI(model='mistral-large-latest')
embed_model = MistralAIEmbedding()

from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.llms.mistralai import MistralAI

llm = MistralAI(model='mistral-large-latest')
embed_model = MistralAIEmbedding()

Download Data

!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

!mkdir -p 'data/paul_graham/'
!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'

Load Data

from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("./data/paul_graham/").load_data()

from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader("./data/paul_graham/").load_data()

Index Construction

from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Note: used to be `Neo4jPGStore`
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="llamaindex",
    url="bolt://localhost:7687",
)

from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore

# Note: used to be `Neo4jPGStore`
graph_store = Neo4jPropertyGraphStore(
    username="neo4j",
    password="llamaindex",
    url="bolt://localhost:7687",
)

from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

index = PropertyGraphIndex.from_documents(
    documents,
    embed_model=embed_model,
    kg_extractors=[
        SimpleLLMPathExtractor(
            llm=llm
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

from llama_index.core import PropertyGraphIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
from llama_index.core.indices.property_graph import SimpleLLMPathExtractor

index = PropertyGraphIndex.from_documents(
    documents,
    embed_model=embed_model,
    kg_extractors=[
        SimpleLLMPathExtractor(
            llm=llm
        )
    ],
    property_graph_store=graph_store,
    show_progress=True,
)

from llama_index.core import Settings
Settings.llm = llm
Settings.embed_model = embed_model

from llama_index.core import Settings
Settings.llm = llm
Settings.embed_model = embed_model

Retrievers

from llama_index.core.indices.property_graph import (
    LLMSynonymRetriever,
    VectorContextRetriever,
)


llm_synonym = LLMSynonymRetriever(
    index.property_graph_store,
    llm=llm,
    include_text=False,
)
vector_context = VectorContextRetriever(
    index.property_graph_store,
    embed_model=embed_model,
    include_text=False,
)

from llama_index.core.indices.property_graph import (
    LLMSynonymRetriever,
    VectorContextRetriever,
)


llm_synonym = LLMSynonymRetriever(
    index.property_graph_store,
    llm=llm,
    include_text=False,
)
vector_context = VectorContextRetriever(
    index.property_graph_store,
    embed_model=embed_model,
    include_text=False,
)

Querying

Retrieving

retriever = index.as_retriever(
    sub_retrievers=[
        llm_synonym,
        vector_context,
    ],
)

nodes = retriever.retrieve("What did author do at Viaweb?")

for node in nodes:
    print(node.text)

retriever = index.as_retriever(
    sub_retrievers=[
        llm_synonym,
        vector_context,
    ],
)

nodes = retriever.retrieve("What did author do at Viaweb?")

for node in nodes:
    print(node.text)

QueryEngine

query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("What did author do at Viaweb?")

display(Markdown(f"{response.response}"))

query_engine = index.as_query_engine(include_text=True)

response = query_engine.query("What did author do at Viaweb?")

display(Markdown(f"{response.response}"))