mirror of
https://github.com/camel-ai/owl.git
synced 2026-03-22 05:57:17 +08:00
279 lines
9.5 KiB
Python
279 lines
9.5 KiB
Python
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
from typing import TYPE_CHECKING, Optional, Union
|
|
|
|
if TYPE_CHECKING:
|
|
from unstructured.documents.elements import Element
|
|
|
|
from camel.agents import ChatAgent
|
|
from camel.messages import BaseMessage
|
|
from camel.models import BaseModelBackend
|
|
from camel.prompts import TextPrompt
|
|
from camel.storages.graph_storages.graph_element import (
|
|
GraphElement,
|
|
Node,
|
|
Relationship,
|
|
)
|
|
from camel.types import RoleType
|
|
|
|
# AgentOps decorator setting
|
|
try:
|
|
import os
|
|
|
|
if os.getenv("AGENTOPS_API_KEY") is not None:
|
|
from agentops import track_agent
|
|
else:
|
|
raise ImportError
|
|
except (ImportError, AttributeError):
|
|
from camel.utils import track_agent
|
|
|
|
|
|
text_prompt = """
|
|
You are tasked with extracting nodes and relationships from given content and
|
|
structures them into Node and Relationship objects. Here's the outline of what
|
|
you needs to do:
|
|
|
|
Content Extraction:
|
|
You should be able to process input content and identify entities mentioned
|
|
within it.
|
|
Entities can be any noun phrases or concepts that represent distinct entities
|
|
in the context of the given content.
|
|
|
|
Node Extraction:
|
|
For each identified entity, you should create a Node object.
|
|
Each Node object should have a unique identifier (id) and a type (type).
|
|
Additional properties associated with the node can also be extracted and
|
|
stored.
|
|
|
|
Relationship Extraction:
|
|
You should identify relationships between entities mentioned in the content.
|
|
For each relationship, create a Relationship object.
|
|
A Relationship object should have a subject (subj) and an object (obj) which
|
|
are Node objects representing the entities involved in the relationship.
|
|
Each relationship should also have a type (type), and additional properties if
|
|
applicable.
|
|
|
|
Output Formatting:
|
|
The extracted nodes and relationships should be formatted as instances of the
|
|
provided Node and Relationship classes.
|
|
Ensure that the extracted data adheres to the structure defined by the classes.
|
|
Output the structured data in a format that can be easily validated against
|
|
the provided code.
|
|
Do not wrap the output in lists or dictionaries, provide the Node and
|
|
Relationship with unique identifiers.
|
|
Strictly follow the format provided in the example output, do not add any
|
|
additional information.
|
|
|
|
|
|
Instructions for you:
|
|
Read the provided content thoroughly.
|
|
Identify distinct entities mentioned in the content and categorize them as
|
|
nodes.
|
|
Determine relationships between these entities and represent them as directed
|
|
relationships.
|
|
Provide the extracted nodes and relationships in the specified format below.
|
|
Example for you:
|
|
|
|
Example Content:
|
|
"John works at XYZ Corporation. He is a software engineer. The company is
|
|
located in New York City."
|
|
|
|
Expected Output:
|
|
|
|
Nodes:
|
|
|
|
Node(id='John', type='Person')
|
|
Node(id='XYZ Corporation', type='Organization')
|
|
Node(id='New York City', type='Location')
|
|
|
|
Relationships:
|
|
|
|
Relationship(subj=Node(id='John', type='Person'), obj=Node(id='XYZ
|
|
Corporation', type='Organization'), type='WorksAt')
|
|
Relationship(subj=Node(id='John', type='Person'), obj=Node(id='New York City',
|
|
type='Location'), type='ResidesIn')
|
|
|
|
===== TASK =====
|
|
Please extracts nodes and relationships from given content and structures them
|
|
into Node and Relationship objects.
|
|
|
|
{task}
|
|
"""
|
|
|
|
|
|
@track_agent(name="KnowledgeGraphAgent")
|
|
class KnowledgeGraphAgent(ChatAgent):
|
|
r"""An agent that can extract node and relationship information for
|
|
different entities from given `Element` content.
|
|
|
|
Attributes:
|
|
task_prompt (TextPrompt): A prompt for the agent to extract node and
|
|
relationship information for different entities.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
model: Optional[BaseModelBackend] = None,
|
|
) -> None:
|
|
r"""Initialize the `KnowledgeGraphAgent`.
|
|
|
|
Args:
|
|
model (BaseModelBackend, optional): The model backend to use for
|
|
generating responses. (default: :obj:`OpenAIModel` with
|
|
`GPT_4O_MINI`)
|
|
"""
|
|
system_message = BaseMessage(
|
|
role_name="Graphify",
|
|
role_type=RoleType.ASSISTANT,
|
|
meta_dict=None,
|
|
content="Your mission is to transform unstructured content "
|
|
"into structured graph data. Extract nodes and relationships with "
|
|
"precision, and let the connections unfold. Your graphs will "
|
|
"illuminate the hidden connections within the chaos of "
|
|
"information.",
|
|
)
|
|
super().__init__(system_message, model=model)
|
|
|
|
def run(
|
|
self,
|
|
element: "Element",
|
|
parse_graph_elements: bool = False,
|
|
prompt: Optional[str] = None,
|
|
) -> Union[str, GraphElement]:
|
|
r"""Run the agent to extract node and relationship information.
|
|
|
|
Args:
|
|
element (Element): The input element.
|
|
parse_graph_elements (bool, optional): Whether to parse into
|
|
`GraphElement`. Defaults to `False`.
|
|
prompt (str, optional): The custom prompt to be used.
|
|
Defaults to `None`.
|
|
|
|
Returns:
|
|
Union[str, GraphElement]: The extracted node and relationship
|
|
information. If `parse_graph_elements` is `True` then return
|
|
`GraphElement`, else return `str`.
|
|
"""
|
|
self.reset()
|
|
self.element = element
|
|
|
|
# Use the provided prompt or fall back to the default text_prompt
|
|
final_prompt = prompt if prompt is not None else text_prompt
|
|
|
|
knowledge_graph_prompt = TextPrompt(final_prompt)
|
|
knowledge_graph_generation = knowledge_graph_prompt.format(
|
|
task=str(element)
|
|
)
|
|
|
|
response = self.step(input_message=knowledge_graph_generation)
|
|
|
|
content = response.msg.content
|
|
|
|
if parse_graph_elements:
|
|
content = self._parse_graph_elements(content)
|
|
|
|
return content
|
|
|
|
def _validate_node(self, node: Node) -> bool:
|
|
r"""Validate if the object is a valid Node.
|
|
|
|
Args:
|
|
node (Node): Object to be validated.
|
|
|
|
Returns:
|
|
bool: True if the object is a valid Node, False otherwise.
|
|
"""
|
|
return (
|
|
isinstance(node, Node)
|
|
and isinstance(node.id, (str, int))
|
|
and isinstance(node.type, str)
|
|
)
|
|
|
|
def _validate_relationship(self, relationship: Relationship) -> bool:
|
|
r"""Validate if the object is a valid Relationship.
|
|
|
|
Args:
|
|
relationship (Relationship): Object to be validated.
|
|
|
|
Returns:
|
|
bool: True if the object is a valid Relationship, False otherwise.
|
|
"""
|
|
return (
|
|
isinstance(relationship, Relationship)
|
|
and self._validate_node(relationship.subj)
|
|
and self._validate_node(relationship.obj)
|
|
and isinstance(relationship.type, str)
|
|
)
|
|
|
|
def _parse_graph_elements(self, input_string: str) -> GraphElement:
|
|
r"""Parses graph elements from given content.
|
|
|
|
Args:
|
|
input_string (str): The input content.
|
|
|
|
Returns:
|
|
GraphElement: The parsed graph elements.
|
|
"""
|
|
import re
|
|
|
|
# Regular expressions to extract nodes and relationships
|
|
node_pattern = r"Node\(id='(.*?)', type='(.*?)'\)"
|
|
rel_pattern = (
|
|
r"Relationship\(subj=Node\(id='(.*?)', type='(.*?)'\), "
|
|
r"obj=Node\(id='(.*?)', type='(.*?)'\), "
|
|
r"type='(.*?)'(?:, timestamp='(.*?)')?\)"
|
|
)
|
|
|
|
nodes = {}
|
|
relationships = []
|
|
|
|
# Extract nodes
|
|
for match in re.finditer(node_pattern, input_string):
|
|
id, type = match.groups()
|
|
properties = {'source': 'agent_created'}
|
|
if id not in nodes:
|
|
node = Node(id=id, type=type, properties=properties)
|
|
if self._validate_node(node):
|
|
nodes[id] = node
|
|
|
|
# Extract relationships
|
|
for match in re.finditer(rel_pattern, input_string):
|
|
groups = match.groups()
|
|
if len(groups) == 6:
|
|
subj_id, subj_type, obj_id, obj_type, rel_type, timestamp = (
|
|
groups
|
|
)
|
|
else:
|
|
subj_id, subj_type, obj_id, obj_type, rel_type = groups
|
|
timestamp = None
|
|
properties = {'source': 'agent_created'}
|
|
if subj_id in nodes and obj_id in nodes:
|
|
subj = nodes[subj_id]
|
|
obj = nodes[obj_id]
|
|
relationship = Relationship(
|
|
subj=subj,
|
|
obj=obj,
|
|
type=rel_type,
|
|
timestamp=timestamp,
|
|
properties=properties,
|
|
)
|
|
if self._validate_relationship(relationship):
|
|
relationships.append(relationship)
|
|
|
|
return GraphElement(
|
|
nodes=list(nodes.values()),
|
|
relationships=relationships,
|
|
source=self.element,
|
|
)
|