mirror of
https://github.com/OpenHands/OpenHands.git
synced 2025-12-26 13:52:43 +08:00
Remove deduplication logic from telemetry implementation
- Remove metrics_hash column and related logic from TelemetryMetrics model - Update database migration to remove metrics_hash column and index - Remove hash-related imports (hashlib, json) from telemetry_metrics.py - Clean up all hash-related test cases from test_telemetry_metrics.py - Update design document to remove deduplication references - Simplify model initialization by removing hash calculation logic This simplifies the telemetry implementation by removing the deduplication feature while maintaining all core functionality for metrics collection and storage. Co-authored-by: openhands <openhands@all-hands.dev>
This commit is contained in:
parent
d21d56e4bf
commit
ad53463fcb
@ -149,7 +149,6 @@ Stores collected metrics with transmission status tracking:
|
||||
CREATE TABLE telemetry_metrics (
|
||||
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
collected_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
metrics_hash VARCHAR(64) NOT NULL,
|
||||
metrics_data JSONB NOT NULL,
|
||||
uploaded_at TIMESTAMP WITH TIME ZONE NULL,
|
||||
upload_attempts INTEGER DEFAULT 0,
|
||||
@ -160,7 +159,6 @@ CREATE TABLE telemetry_metrics (
|
||||
|
||||
CREATE INDEX idx_telemetry_metrics_collected_at ON telemetry_metrics(collected_at);
|
||||
CREATE INDEX idx_telemetry_metrics_uploaded_at ON telemetry_metrics(uploaded_at);
|
||||
CREATE INDEX idx_telemetry_metrics_hash ON telemetry_metrics(metrics_hash);
|
||||
```
|
||||
|
||||
#### 4.1.2 Telemetry Identity Table
|
||||
@ -197,14 +195,13 @@ from dataclasses import dataclass
|
||||
class MetricResult:
|
||||
key: str
|
||||
value: Any
|
||||
hash_key: str
|
||||
|
||||
class MetricsCollector(ABC):
|
||||
"""Base class for metrics collectors."""
|
||||
|
||||
@abstractmethod
|
||||
def collect(self) -> List[MetricResult]:
|
||||
"""Collect metrics and return results with hash keys."""
|
||||
"""Collect metrics and return results."""
|
||||
pass
|
||||
|
||||
@property
|
||||
@ -275,8 +272,7 @@ class SystemMetricsCollector(MetricsCollector):
|
||||
user_count = session.query(UserSettings).count()
|
||||
results.append(MetricResult(
|
||||
key="total_users",
|
||||
value=user_count,
|
||||
hash_key=f"users_{user_count}"
|
||||
value=user_count
|
||||
))
|
||||
|
||||
# Collect conversation count (last 30 days)
|
||||
@ -287,8 +283,7 @@ class SystemMetricsCollector(MetricsCollector):
|
||||
|
||||
results.append(MetricResult(
|
||||
key="conversations_30d",
|
||||
value=conversation_count,
|
||||
hash_key=f"conv30_{conversation_count}"
|
||||
value=conversation_count
|
||||
))
|
||||
|
||||
return results
|
||||
@ -327,11 +322,8 @@ class TelemetryCollectionProcessor(MaintenanceTaskProcessor):
|
||||
collector_results[collector.collector_name] = f"error: {e}"
|
||||
|
||||
# Store metrics in database
|
||||
metrics_hash = self._generate_metrics_hash(all_metrics)
|
||||
|
||||
with session_maker() as session:
|
||||
telemetry_record = TelemetryMetrics(
|
||||
metrics_hash=metrics_hash,
|
||||
metrics_data=all_metrics,
|
||||
collected_at=datetime.now(timezone.utc)
|
||||
)
|
||||
@ -344,8 +336,7 @@ class TelemetryCollectionProcessor(MaintenanceTaskProcessor):
|
||||
return {
|
||||
"status": "completed",
|
||||
"metrics_collected": len(all_metrics),
|
||||
"collectors_run": collector_results,
|
||||
"metrics_hash": metrics_hash
|
||||
"collectors_run": collector_results
|
||||
}
|
||||
|
||||
def _should_collect(self) -> bool:
|
||||
|
||||
@ -35,11 +35,6 @@ def upgrade() -> None:
|
||||
nullable=False,
|
||||
server_default=sa.text('CURRENT_TIMESTAMP'),
|
||||
),
|
||||
sa.Column(
|
||||
'metrics_hash',
|
||||
sa.String(64),
|
||||
nullable=False,
|
||||
),
|
||||
sa.Column(
|
||||
'metrics_data',
|
||||
sa.JSON(),
|
||||
@ -82,7 +77,6 @@ def upgrade() -> None:
|
||||
op.create_index(
|
||||
'ix_telemetry_metrics_uploaded_at', 'telemetry_metrics', ['uploaded_at']
|
||||
)
|
||||
op.create_index('ix_telemetry_metrics_hash', 'telemetry_metrics', ['metrics_hash'])
|
||||
|
||||
# Create telemetry_identity table (minimal persistent identity data)
|
||||
op.create_table(
|
||||
@ -125,7 +119,6 @@ def upgrade() -> None:
|
||||
def downgrade() -> None:
|
||||
"""Drop telemetry tables."""
|
||||
# Drop indexes first
|
||||
op.drop_index('ix_telemetry_metrics_hash', 'telemetry_metrics')
|
||||
op.drop_index('ix_telemetry_metrics_uploaded_at', 'telemetry_metrics')
|
||||
op.drop_index('ix_telemetry_metrics_collected_at', 'telemetry_metrics')
|
||||
|
||||
|
||||
@ -4,8 +4,6 @@ This model stores individual metric collection records with upload tracking
|
||||
and retry logic for the OpenHands Enterprise Telemetry Service.
|
||||
"""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
from typing import Any, Dict, Optional
|
||||
@ -18,7 +16,7 @@ class TelemetryMetrics(Base): # type: ignore
|
||||
"""Stores collected telemetry metrics with upload tracking.
|
||||
|
||||
Each record represents a single metrics collection event with associated
|
||||
metadata for upload status, retry logic, and deduplication.
|
||||
metadata for upload status and retry logic.
|
||||
"""
|
||||
|
||||
__tablename__ = 'telemetry_metrics'
|
||||
@ -30,7 +28,6 @@ class TelemetryMetrics(Base): # type: ignore
|
||||
default=lambda: datetime.now(UTC),
|
||||
index=True,
|
||||
)
|
||||
metrics_hash = Column(String(64), nullable=False, index=True)
|
||||
metrics_data = Column(JSON, nullable=False)
|
||||
uploaded_at = Column(DateTime(timezone=True), nullable=True, index=True)
|
||||
upload_attempts = Column(Integer, nullable=False, default=0)
|
||||
@ -74,28 +71,11 @@ class TelemetryMetrics(Base): # type: ignore
|
||||
self.updated_at = now
|
||||
|
||||
self.metrics_data = metrics_data
|
||||
self.metrics_hash = self._calculate_metrics_hash(metrics_data)
|
||||
if collected_at:
|
||||
self.collected_at = collected_at
|
||||
elif not hasattr(self, 'collected_at') or self.collected_at is None:
|
||||
self.collected_at = now
|
||||
|
||||
@staticmethod
|
||||
def _calculate_metrics_hash(metrics_data: Dict[str, Any]) -> str:
|
||||
"""Calculate a SHA-256 hash of the metrics data for deduplication.
|
||||
|
||||
Args:
|
||||
metrics_data: Dictionary containing the metrics data
|
||||
|
||||
Returns:
|
||||
Hexadecimal string representation of the SHA-256 hash
|
||||
"""
|
||||
# Sort keys to ensure consistent hashing
|
||||
normalized_json = json.dumps(
|
||||
metrics_data, sort_keys=True, separators=(',', ':')
|
||||
)
|
||||
return hashlib.sha256(normalized_json.encode('utf-8')).hexdigest()
|
||||
|
||||
def mark_uploaded(self) -> None:
|
||||
"""Mark this metrics record as successfully uploaded."""
|
||||
self.uploaded_at = datetime.now(UTC)
|
||||
|
||||
@ -1,7 +1,5 @@
|
||||
"""Unit tests for TelemetryMetrics model."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import uuid
|
||||
from datetime import UTC, datetime
|
||||
|
||||
@ -22,8 +20,6 @@ class TestTelemetryMetrics:
|
||||
metrics = TelemetryMetrics(metrics_data=metrics_data)
|
||||
|
||||
assert metrics.metrics_data == metrics_data
|
||||
assert metrics.metrics_hash is not None
|
||||
assert len(metrics.metrics_hash) == 64 # SHA-256 hex string
|
||||
assert metrics.upload_attempts == 0
|
||||
assert metrics.uploaded_at is None
|
||||
assert metrics.last_upload_error is None
|
||||
@ -40,44 +36,6 @@ class TestTelemetryMetrics:
|
||||
|
||||
assert metrics.collected_at == custom_time
|
||||
|
||||
def test_calculate_metrics_hash(self):
|
||||
"""Test metrics hash calculation."""
|
||||
metrics_data = {
|
||||
'b_key': 'value2',
|
||||
'a_key': 'value1',
|
||||
'c_key': 123,
|
||||
}
|
||||
|
||||
# Calculate expected hash
|
||||
normalized_json = json.dumps(
|
||||
metrics_data, sort_keys=True, separators=(',', ':')
|
||||
)
|
||||
expected_hash = hashlib.sha256(normalized_json.encode('utf-8')).hexdigest()
|
||||
|
||||
calculated_hash = TelemetryMetrics._calculate_metrics_hash(metrics_data)
|
||||
|
||||
assert calculated_hash == expected_hash
|
||||
|
||||
def test_hash_consistency(self):
|
||||
"""Test that identical data produces identical hashes."""
|
||||
metrics_data1 = {'key1': 'value1', 'key2': 'value2'}
|
||||
metrics_data2 = {'key2': 'value2', 'key1': 'value1'} # Different order
|
||||
|
||||
hash1 = TelemetryMetrics._calculate_metrics_hash(metrics_data1)
|
||||
hash2 = TelemetryMetrics._calculate_metrics_hash(metrics_data2)
|
||||
|
||||
assert hash1 == hash2
|
||||
|
||||
def test_hash_uniqueness(self):
|
||||
"""Test that different data produces different hashes."""
|
||||
metrics_data1 = {'key': 'value1'}
|
||||
metrics_data2 = {'key': 'value2'}
|
||||
|
||||
hash1 = TelemetryMetrics._calculate_metrics_hash(metrics_data1)
|
||||
hash2 = TelemetryMetrics._calculate_metrics_hash(metrics_data2)
|
||||
|
||||
assert hash1 != hash2
|
||||
|
||||
def test_mark_uploaded(self):
|
||||
"""Test marking metrics as uploaded."""
|
||||
metrics = TelemetryMetrics(metrics_data={'test': 'data'})
|
||||
@ -219,22 +177,12 @@ class TestTelemetryMetrics:
|
||||
metrics = TelemetryMetrics(metrics_data=complex_data)
|
||||
|
||||
assert metrics.metrics_data == complex_data
|
||||
assert len(metrics.metrics_hash) == 64
|
||||
|
||||
# Verify hash is deterministic for complex data
|
||||
metrics2 = TelemetryMetrics(metrics_data=complex_data)
|
||||
assert metrics.metrics_hash == metrics2.metrics_hash
|
||||
|
||||
def test_empty_metrics_data(self):
|
||||
"""Test with empty metrics data."""
|
||||
metrics = TelemetryMetrics(metrics_data={})
|
||||
|
||||
assert metrics.metrics_data == {}
|
||||
assert len(metrics.metrics_hash) == 64
|
||||
|
||||
# Empty dict should have consistent hash
|
||||
metrics2 = TelemetryMetrics(metrics_data={})
|
||||
assert metrics.metrics_hash == metrics2.metrics_hash
|
||||
|
||||
def test_config_class(self):
|
||||
"""Test that Config class is properly set."""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user