Add prompt caching (Sonnet, Haiku only) (#3411)

* Add prompt caching

* remove anthropic-version from extra_headers

* change supports_prompt_caching method to attribute

* change caching strat and log cache statistics

* add reminder as a new message to fix caching

* fix unit test

* append reminder to the end of the last message content

* move token logs to post completion function

* fix unit test failure

* fix reminder and prompt caching

* unit tests for prompt caching

* add test

* clean up tests

* separate reminder, use latest two messages

* fix tests

---------

Co-authored-by: tobitege <10787084+tobitege@users.noreply.github.com>
Co-authored-by: Xingyao Wang <xingyao6@illinois.edu>
Co-authored-by: Engel Nyst <enyst@users.noreply.github.com>
This commit is contained in:
Kaushik Deka
2024-08-26 17:46:44 -07:00
committed by GitHub
parent e72dc96d13
commit 5bb931e4d6
4 changed files with 300 additions and 29 deletions

View File

@@ -11,6 +11,7 @@ class ContentType(Enum):
class Content(BaseModel):
type: ContentType
cache_prompt: bool = False
@model_serializer
def serialize_model(self):
@@ -23,7 +24,13 @@ class TextContent(Content):
@model_serializer
def serialize_model(self):
return {'type': self.type.value, 'text': self.text}
data: dict[str, str | dict[str, str]] = {
'type': self.type.value,
'text': self.text,
}
if self.cache_prompt:
data['cache_control'] = {'type': 'ephemeral'}
return data
class ImageContent(Content):
@@ -35,6 +42,8 @@ class ImageContent(Content):
images: list[dict[str, str | dict[str, str]]] = []
for url in self.image_urls:
images.append({'type': self.type.value, 'image_url': {'url': url}})
if self.cache_prompt and images:
images[-1]['cache_control'] = {'type': 'ephemeral'}
return images