Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(support): Max for docs #26988

Open
wants to merge 26 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
55b8ac9
Support sidebar Max AI - not yet integrated
slshults Dec 11, 2024
a386201
Fixed the `Send` button, no longer looks disabled.
slshults Dec 11, 2024
eb2a395
Fixed autoscrolling
slshults Dec 12, 2024
9c62eaa
Created an exception to allow to work on links in Max's responses. A…
slshults Dec 12, 2024
f764a7c
Put a stop to the weird autoscroll upward and
slshults Dec 13, 2024
2204e21
Restoring error handling for 500 errors
slshults Dec 14, 2024
8e3058a
On the support sidebar, hid the docs ToC behind
slshults Dec 14, 2024
4264bbf
Fixed links to the support form, generated by Max, so that they open …
slshults Dec 15, 2024
b8f3401
Starting to integrate Max. He works in the previous commit, broken in…
slshults Dec 16, 2024
cd6e53d
A step in bringing this branch closer to up-to-date with master
slshults Dec 16, 2024
39da5d1
Another step in bringing up to date with
slshults Dec 17, 2024
6e3c9f5
another step in bringing the branch up to date with master. Server c…
slshults Dec 17, 2024
828143d
Max is now integrated and working on my local, steps remain (secrets …
slshults Dec 17, 2024
76d5364
catching up with master
slshults Dec 17, 2024
7e911a9
Simplify settings
Twixes Dec 17, 2024
6df7860
merged Max's requirements, fixed session handling, simplified convers…
slshults Dec 18, 2024
671b350
Merge remote-tracking branch 'origin/master' into support-sidebar-max…
slshults Dec 18, 2024
3273fbc
Raising the feature flag. ⛳️
slshults Dec 18, 2024
0028ae4
catching up merged requirements-dev.txt
slshults Dec 18, 2024
624069c
Merge branch 'master' into support-sidebar-max-integration
Twixes Dec 18, 2024
04c2256
Remove redundant new deps
Twixes Dec 18, 2024
5ae5b1d
Roll back some redundant frontend changes
Twixes Dec 18, 2024
3206da7
Update query snapshots
github-actions[bot] Dec 18, 2024
51a6bcf
Update UI snapshots for `chromium` (1)
github-actions[bot] Dec 18, 2024
7be9b10
Roll back some redundant frontend changes
Twixes Dec 18, 2024
b0273dc
Merge branch 'support-sidebar-max-integration' of https://github.com/…
Twixes Dec 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,18 @@ plugin-transpiler/dist
*.log
# pyright config (keep this until we have a standardized one)
pyrightconfig.json

# Max-specific entries
ee/support_sidebar_max/max-venv/
ee/support_sidebar_max/.vscode
ee/support_sidebar_max/.vscode/settings.json
max-test-venv/
ee/support_sidebar_max/.env
# Assistant Evaluation with Deepeval
.deepeval
.deepeval-cache.json
.deepeval_telemtry.txt
.temporal-worker-settings
temp_test_run_data.json
.temp-deepeval-cache.json
.eslintcache
.eslintcache
46 changes: 46 additions & 0 deletions ee/management/commands/run_max.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from django.core.management.base import BaseCommand
import logging
import sys

from ee.support_sidebar_max.sidebar_max_AI import app

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)


class Command(BaseCommand):
help = "Run Max's chat server"

def add_arguments(self, parser):
parser.add_argument(
"--port",
type=int,
default=3001,
help="Port to run the server on (default: 3001)",
)
parser.add_argument(
"--host",
type=str,
default="0.0.0.0",
help="Host to bind to (default: 0.0.0.0)",
)
parser.add_argument(
"--debug",
action="store_true",
help="Run in debug mode",
)

def handle(self, *args, **options):
port = options["port"]
host = options["host"]
debug = options["debug"]

logger.info("Starting Max's chat server on port %d... 🦔", port)
try:
app.run(host=host, port=port, debug=debug)
except KeyboardInterrupt:
logger.info("\nShutting down Max's chat server... 👋")
sys.exit(0)
except Exception as e:
logger.exception("\n\n🔴 Oops! Something went wrong: %s\n", str(e))
sys.exit(1)
2 changes: 2 additions & 0 deletions ee/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,5 @@
LANGFUSE_PUBLIC_KEY = get_from_env("LANGFUSE_PUBLIC_KEY", "", type_cast=str)
LANGFUSE_SECRET_KEY = get_from_env("LANGFUSE_SECRET_KEY", "", type_cast=str)
LANGFUSE_HOST = get_from_env("LANGFUSE_HOST", "https://us.cloud.langfuse.com", type_cast=str)

ANTHROPIC_API_KEY = get_from_env("ANTHROPIC_API_KEY", "")
5 changes: 5 additions & 0 deletions ee/support_sidebar_max/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from posthog.api import projects_router
from .views import MaxViewSet

# Register Max's viewset under the project's router
projects_router.register(r"max", MaxViewSet, "project_max", ["project_id"])
180 changes: 180 additions & 0 deletions ee/support_sidebar_max/max_search_tool.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import requests
from bs4 import BeautifulSoup # type: ignore

Check failure on line 2 in ee/support_sidebar_max/max_search_tool.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Unused "type: ignore" comment
import re # noqa: F401
from urllib.parse import urljoin, urlparse # noqa: F401
import logging
import time

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

SITEMAP_URL = "https://posthog.com/sitemap/sitemap-0.xml"

STATUS_PAGE_URL = "https://status.posthog.com"

HOGQL_PRIORITY_URLS = [
"https://posthog.com/docs/hogql",
"https://posthog.com/docs/hogql/aggregations",
"https://posthog.com/docs/hogql/clickhouse-functions",
"https://posthog.com/docs/hogql/expressions",
"https://posthog.com/docs/product-analytics/sql",
]


def is_hogql_query(query):
hogql_keywords = ["hogql", "sql", "query", "aggregate", "function", "expression"]
return any(keyword in query.lower() for keyword in hogql_keywords)


def is_status_query(query):
status_keywords = ["status", "incident", "outage", "downtime", "ingestion", "slow", "lag", "delays"]
return any(keyword in query.lower() for keyword in status_keywords)


def get_relevant_urls(query):
urls = []

try:
response = requests.get(SITEMAP_URL)
response.raise_for_status()
soup = BeautifulSoup(response.content, "xml")
for url in soup.find_all("loc"):
loc = url.text
if "/questions/" not in loc:
urls.append(loc)
if is_hogql_query(query):
urls.extend(HOGQL_PRIORITY_URLS)
urls.append(STATUS_PAGE_URL)
return urls
except requests.RequestException as e:
logger.error(f"Error fetching sitemap: {str(e)}") # noqa: TRY400
return urls


def prioritize_urls(urls, query):
priority_dirs = {
"docs": ["docs", "tutorials"],
"how": ["docs", "tutorials"],
"pricing": ["pricing"],
"jobs": ["careers"],
"history": ["about", "handbook", "blog"],
"teams": ["teams"],
}

query_type = "docs" # default
for key in priority_dirs:
if key in query.lower():
query_type = key
break

def calculate_relevance(url):
query_words = query.lower().split()
url_lower = url.lower()
word_match_score = sum(3 if word in url_lower else 1 for word in query_words if word in url_lower)
url_depth = len(url.strip("/").split("/"))
depth_score = min(url_depth, 5)
priority_score = 5 if any(dir in url for dir in priority_dirs[query_type]) else 0

if is_hogql_query(query) and url in HOGQL_PRIORITY_URLS:
priority_score += 10

if is_status_query(query) and url == STATUS_PAGE_URL:
priority_score += 15

return (word_match_score * 2) + (depth_score * 1.5) + priority_score

return sorted(urls, key=calculate_relevance, reverse=True)


def max_search_tool(query):
relevant_urls = get_relevant_urls(query)
prioritized_urls = prioritize_urls(relevant_urls, query)
results = []
errors = []

max_urls_to_process = 30
max_chars = 10000
relevance_threshold = 0.6
min_results = 5
timeout = 20 # noqa: F841
start_time = time.time() # noqa: F841

def has_highly_relevant_results(results, threshold=2):
return len(results) >= threshold and all(
len(result["relevant_passages"]) >= 2 for result in results[:threshold]
)

for url in prioritized_urls[:max_urls_to_process]:
try:
logger.info(f"Searching {url}")
response = requests.get(url, allow_redirects=True, timeout=10)
response.raise_for_status()
soup = BeautifulSoup(response.content, "html.parser")

for script in soup(["script", "style"]):
script.decompose()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = "\n".join(chunk for chunk in chunks if chunk)

paragraphs = text.split("\n\n")
relevant_passages = []
for i, paragraph in enumerate(paragraphs):
relevance_score = sum(word.lower() in paragraph.lower() for word in query.split())
if relevance_score > 0:
relevant_text = paragraph
char_count = len(relevant_text)

for j in range(i + 1, min(i + 5, len(paragraphs))):
if char_count + len(paragraphs[j]) <= max_chars:
relevant_text += "\n\n" + paragraphs[j]
char_count += len(paragraphs[j])
else:
break

heading = "Unknown Section"
for tag in soup.find_all(["h1", "h2", "h3", "h4", "h5", "h6"]):
if tag.string and tag.string in paragraph:
heading = tag.string
break

relevant_passages.append(
{
"text": relevant_text[:10000],
"url": url,
"heading": heading,
"relevance_score": relevance_score,
}
)

if relevant_passages:
relevant_passages.sort(key=lambda x: x["relevance_score"], reverse=True)
result = {
"page_title": soup.title.string if soup.title else "Untitled",
"url": url,
"relevant_passages": relevant_passages[:4],
}
results.append(result)

if len(results) >= min_results and relevant_passages[0]["relevance_score"] > relevance_threshold:
logger.info(f"Found sufficient relevant results so stopping search.")
break

if has_highly_relevant_results(results):
logger.info("Found highly relevant results so stopping search.")
break

except requests.RequestException as e:
error_message = f"Error fetching {url}: {str(e)}"
logger.error(error_message) # noqa: TRY400
errors.append(error_message)

if not results and not errors:
return (
"Well this is odd. My searches aren't finding anything for that. Could you try asking with different words?"
)
elif errors and not results:
return f"Oof. Sorry about this. I ran into errors when trying to search: {'; '.join(errors)}"
else:
return results[:5]
36 changes: 36 additions & 0 deletions ee/support_sidebar_max/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
annotated-types==0.7.0
anthropic==0.40.0
anyio==4.7.0
beautifulsoup4==4.12.3
blinker==1.9.0
certifi==2024.8.30
charset-normalizer==3.4.0
click==8.1.7
distro==1.9.0
filelock==3.16.1
Flask==3.1.0
Flask-Cors==5.0.0
fsspec==2024.10.0
h11==0.14.0
httpcore==1.0.7
httpx==0.28.1
huggingface-hub==0.26.5
idna==3.10
itsdangerous==2.2.0
Jinja2==3.1.4
jiter==0.8.2
lxml==5.3.0
MarkupSafe==3.0.2
packaging==24.2
pydantic==2.10.3
pydantic_core==2.27.1
python-dotenv==1.0.1
PyYAML==6.0.2
requests==2.32.3
sniffio==1.3.1
soupsieve==2.6
tokenizers==0.21.0
tqdm==4.67.1
typing_extensions==4.12.2
urllib3==2.2.3
Werkzeug==3.1.3
Loading
Loading