Files
owl/community_usecase/stock-analysis/tools/sec_tools.py
ptonlix cc771cdf15 eat: Add stock analysis use cases, including SEC tools, proxies, and sample reports
This submission introduces stock analysis use cases, which include the following main components:
1. Added SEC toolset (` sec_tools. py `) for retrieving and analyzing 10-K and 10-Q files from the SEC database.
2. Add an SEC agent (` sec_agent. py `) to generate a comprehensive analysis report of the company's SEC documents.
3. Provide sample reports (Alibaba_investment.analysis.md and Google_investment.analysis.md) to demonstrate complete stock investment analysis.
4. Add environment variable templates (`. env. template `) and `. gitignore ` files to ensure the security of project configuration.
5. Add the 'run. py' script to run the stock analysis agent and generate reports.
These changes provide a complete solution for stock investment analysis, supporting the entire process from data acquisition to report generation.

feat:  Add stock analysis agent and related documents and example files

This submission includes the implementation code of the stock analysis agent, Chinese and English README documents, example files (including Apple's investment analysis report and chat records), and required dependencies for the project. These changes provide a complete stock analysis tool for the project, helping users generate detailed stock analysis reports.

chore:  Delete useless. gitkeep files

Clean up. gitkeep files that are no longer needed in the project to keep the codebase clean
2025-03-30 23:06:51 +08:00

194 lines
7.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from typing import List
from typing import Optional
from camel.toolkits.base import BaseToolkit
from camel.toolkits.function_tool import FunctionTool
from camel.utils import api_keys_required, dependencies_required
import requests
import html2text
import re
class SECToolkit(BaseToolkit):
r"""A class representing a toolkit for SEC filings analysis.
This toolkit provides functionality to:
- Fetch and process 10-K (annual) reports
- Fetch and process 10-Q (quarterly) reports
- Clean and format filing content for analysis
- Support semantic search capabilities on filing content
The toolkit requires SEC API credentials and handles HTTP requests
to SEC's EDGAR database to retrieve filing documents.
"""
@dependencies_required("sec_api")
@api_keys_required(
[
(None, "SEC_API_API_KEY"),
]
)
def fetch_10k_filing(self, stock_name: str) -> Optional[str]:
r"""Fetches and processes the latest 10-K form content for a given stock symbol.
This function retrieves the most recent 10-K filing from SEC's database using
the provided stock ticker symbol. It downloads the filing content, converts
it from HTML to text format, and performs text cleaning.
Args:
stock_name (str): The stock ticker symbol (e.g., 'AAPL' for Apple Inc.).
Returns:
Optional[str]: A cleaned text version of the 10-K filing content.
Returns None in the following cases:
- No filings found for the given stock symbol
- HTTP errors during content retrieval
- Other exceptions during processing
The returned text is preprocessed to:
- Remove HTML formatting
- Remove special characters
- Retain only alphanumeric characters, dollar signs, spaces and newlines
"""
from sec_api import QueryApi
import os
try:
queryApi = QueryApi(api_key=os.environ['SEC_API_API_KEY'])
query = {
"query": {
"query_string": {
"query": f"ticker:{stock_name} AND formType:\"10-K\""
}
},
"from": "0",
"size": "1",
"sort": [{ "filedAt": { "order": "desc" }}]
}
response = queryApi.get_filings(query)
if response and 'filings' in response:
filings = response['filings']
else:
filings = []
if len(filings) == 0:
print("No filings found for this stock.")
return None
url = filings[0]['linkToFilingDetails']
headers = {
"User-Agent": "crewai.com bisan@crewai.com",
"Accept-Encoding": "gzip, deflate",
"Host": "www.sec.gov"
}
response = requests.get(url, headers=headers)
response.raise_for_status()
h = html2text.HTML2Text()
h.ignore_links = False
text = h.handle(response.content.decode("utf-8"))
text = re.sub(r"[^a-zA-Z$0-9\s\n]", "", text)
return text
except requests.exceptions.HTTPError as e:
print(f"HTTP error occurred: {e}")
return None
except Exception as e:
print(f"Error fetching 10-K URL: {e}")
return None
@dependencies_required("sec_api")
@api_keys_required(
[
(None, "SEC_API_API_KEY"),
]
)
def fetch_10q_filing(self, stock_name: str) -> Optional[str]:
r"""Fetches and processes the latest 10-Q form content for a given stock symbol.
This function retrieves the most recent 10-Q filing from SEC's database using
the provided stock ticker symbol. It downloads the filing content, converts
it from HTML to text format, and performs text cleaning.
Args:
stock_name (str): The stock ticker symbol (e.g., 'AAPL' for Apple Inc.).
Returns:
Optional[str]: A cleaned text version of the 10-Q filing content.
Returns None in the following cases:
- No filings found for the given stock symbol
- HTTP errors during content retrieval
- Other exceptions during processing
The returned text is preprocessed to:
- Remove HTML formatting
- Remove special characters
- Retain only alphanumeric characters, dollar signs, spaces and newlines
"""
from sec_api import QueryApi
import os
try:
queryApi = QueryApi(api_key=os.environ['SEC_API_API_KEY'])
query = {
"query": {
"query_string": {
"query": f"ticker:{stock_name} AND formType:\"10-Q\""
}
},
"from": "0",
"size": "1",
"sort": [{ "filedAt": { "order": "desc" }}]
}
response = queryApi.get_filings(query)
if response and 'filings' in response:
filings = response['filings']
else:
filings = []
if len(filings) == 0:
print("No filings found for this stock.")
return None
url = filings[0]['linkToFilingDetails']
headers = {
"User-Agent": "crewai.com bisan@crewai.com",
"Accept-Encoding": "gzip, deflate",
"Host": "www.sec.gov"
}
response = requests.get(url, headers=headers)
response.raise_for_status() # Raise an exception for HTTP errors
h = html2text.HTML2Text()
h.ignore_links = False
text = h.handle(response.content.decode("utf-8"))
# Removing all non-English words, dollar signs, numbers, and newlines from text
text = re.sub(r"[^a-zA-Z$0-9\s\n]", "", text)
return text
except requests.exceptions.HTTPError as e:
print(f"HTTP error occurred: {e}")
return None
except Exception as e:
print(f"Error fetching 10-Q URL: {e}")
return None
def get_tools(self) -> List[FunctionTool]:
r"""Returns a list of FunctionTool objects representing the
functions in the toolkit.
Returns:
List[FunctionTool]: A list of FunctionTool objects
representing the functions in the toolkit.
"""
return [
FunctionTool(self.fetch_10k_filing),
FunctionTool(self.fetch_10q_filing)
]
if __name__ == "__main__":
toolkit = SECToolkit()
data_10k = toolkit.fetch_10k_filing("GOOG")
data_10q = toolkit.fetch_10q_filing("GOOG")
# 检查 data_10k 是否为 None如果不是则计算长度
print(f"fetch_10k_filing AAPL = {len(data_10k) if data_10k is not None else 0}")
print(f"fetch_10q_filing AAPL = {len(data_10q) if data_10q is not None else 0}")