diff --git a/libs/core/langchain_core/graph_vectorstores/links.py b/libs/core/langchain_core/graph_vectorstores/links.py index 7464eb4aa956b..2a8c96011d69a 100644 --- a/libs/core/langchain_core/graph_vectorstores/links.py +++ b/libs/core/langchain_core/graph_vectorstores/links.py @@ -8,10 +8,112 @@ @beta() @dataclass(frozen=True) class Link: - """A link to/from a tag of a given tag. + """A link to/from a tag of a given kind. - Edges exist from nodes with an outgoing link to nodes with a matching incoming link. - """ + Documents in a :class:`graph vector store ` + are connected via "links". + Links form a bipartite graph between documents and tags: documents are connected + to tags, and tags are connected to other documents. + When documents are retrieved from a graph vector store, a pair of documents are + connected with a depth of one if both documents are connected to the same tag. + + Links have a ``kind`` property, used to namespace different tag identifiers. + For example a link to a keyword might use kind ``kw``, while a link to a URL might + use kind ``url``. + This allows the same tag value to be used in different contexts without causing + name collisions. + + Links are directed. The directionality of links controls how the graph is + traversed at retrieval time. + For example, given documents ``A`` and ``B``, connected by links to tag ``T``: + + +----------+----------+---------------------------------+ + | A to T | B to T | Result | + +==========+==========+=================================+ + | outgoing | incoming | Retrieval traverses from A to B | + +----------+----------+---------------------------------+ + | incoming | incoming | No traversal from A to B | + +----------+----------+---------------------------------+ + | outgoing | incoming | No traversal from A to B | + +----------+----------+---------------------------------+ + | bidir | incoming | Retrieval traverses from A to B | + +----------+----------+---------------------------------+ + | bidir | outgoing | No traversal from A to B | + +----------+----------+---------------------------------+ + | outgoing | bidir | Retrieval traverses from A to B | + +----------+----------+---------------------------------+ + | incoming | bidir | No traversal from A to B | + +----------+----------+---------------------------------+ + + Directed links make it possible to describe relationships such as term + references / definitions: term definitions are generally relevant to any documents + that use the term, but the full set of documents using a term generally aren't + relevant to the term's definition. + + .. seealso:: + - :mod:`How to use a graph vector store ` + - :class:`How to link Documents on hyperlinks in HTML ` + - :class:`How to link Documents on common keywords (using KeyBERT) ` + - :class:`How to link Documents on common named entities (using GliNER) ` + + How to add links to a Document + ============================== + + How to create links + ------------------- + + You can create links using the Link class's constructors :meth:`incoming`, + :meth:`outgoing`, and :meth:`bidir`:: + + from langchain_core.graph_vectorstores.links import Link + + print(Link.bidir(kind="location", tag="Paris")) + + .. code-block:: output + + Link(kind='location', direction='bidir', tag='Paris') + + Extending documents with links + ------------------------------ + + Now that we know how to create links, let's associate them with some documents. + These edges will strengthen the connection between documents that share a keyword + when using a graph vector store to retrieve documents. + + First, we'll load some text and chunk it into smaller pieces. + Then we'll add a link to each document to link them all together:: + + from langchain_community.document_loaders import TextLoader + from langchain_core.graph_vectorstores.links import add_links + from langchain_text_splitters import CharacterTextSplitter + + loader = TextLoader("state_of_the_union.txt") + + raw_documents = loader.load() + text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) + documents = text_splitter.split_documents(raw_documents) + + for doc in documents: + add_links(doc, Link.bidir(kind="genre", tag="oratory")) + + print(documents[0].metadata) + + .. code-block:: output + + {'source': 'state_of_the_union.txt', 'links': [Link(kind='genre', direction='bidir', tag='oratory')]} + + As we can see, each document's metadata now includes a bidirectional link to the + genre ``oratory``. + + The documents can then be added to a graph vector store:: + + from langchain_community.graph_vectorstores import CassandraGraphVectorStore + + graph_vectorstore = CassandraGraphVectorStore.from_documents( + documents=documents, embeddings=... + ) + + """ # noqa: E501 kind: str """The kind of link. Allows different extractors to use the same tag name without @@ -23,17 +125,32 @@ class Link: @staticmethod def incoming(kind: str, tag: str) -> "Link": - """Create an incoming link.""" + """Create an incoming link. + + Args: + kind: the link kind. + tag: the link tag. + """ return Link(kind=kind, direction="in", tag=tag) @staticmethod def outgoing(kind: str, tag: str) -> "Link": - """Create an outgoing link.""" + """Create an outgoing link. + + Args: + kind: the link kind. + tag: the link tag. + """ return Link(kind=kind, direction="out", tag=tag) @staticmethod def bidir(kind: str, tag: str) -> "Link": - """Create a bidirectional link.""" + """Create a bidirectional link. + + Args: + kind: the link kind. + tag: the link tag. + """ return Link(kind=kind, direction="bidir", tag=tag)