-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat (core): add cosineSimilarity helper function (#1939)
- Loading branch information
Showing
10 changed files
with
183 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
--- | ||
'ai': patch | ||
--- | ||
|
||
feat (core): add cosineSimilarity helper function |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
53 changes: 53 additions & 0 deletions
53
content/docs/07-reference/ai-sdk-core/50-cosine-similarity.mdx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
--- | ||
title: cosineSimilarity | ||
description: Calculate the cosine similarity between two vectors (API Reference) | ||
--- | ||
|
||
# `cosineSimilarity()` | ||
|
||
When you want to compare the similarity of embeddings, standard vector similarity metrics | ||
like cosine similarity are often used. | ||
|
||
`cosineSimilarity` calculates the cosine similarity between two vectors. | ||
A high value (close to 1) indicates that the vectors are very similar, while a low value (close to -1) indicates that they are different. | ||
|
||
```ts | ||
import { openai } from '@ai-sdk/openai'; | ||
import { cosineSimilarity, embedMany } from 'ai'; | ||
|
||
const { embeddings } = await embedMany({ | ||
model: openai.embedding('text-embedding-3-small'), | ||
values: ['sunny day at the beach', 'rainy afternoon in the city'], | ||
}); | ||
|
||
console.log( | ||
`cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`, | ||
); | ||
``` | ||
|
||
## Import | ||
|
||
<Snippet text={`import { cosineSimilarity } from "ai"`} prompt={false} /> | ||
|
||
## API Signature | ||
|
||
### Parameters | ||
|
||
<PropertiesTable | ||
content={[ | ||
{ | ||
name: 'vector1', | ||
type: 'number[]', | ||
description: `The first vector to compare`, | ||
}, | ||
{ | ||
name: 'vector2', | ||
type: 'number[]', | ||
description: `The second vector to compare`, | ||
}, | ||
]} | ||
/> | ||
|
||
### Returns | ||
|
||
A number between -1 and 1 representing the cosine similarity between the two vectors. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 0 additions & 20 deletions
20
examples/ai-core/src/complex/semantic-router/cosine-similarity.ts
This file was deleted.
Oops, something went wrong.
9 changes: 7 additions & 2 deletions
9
examples/ai-core/src/complex/semantic-router/semantic-router.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
18 changes: 18 additions & 0 deletions
18
examples/ai-core/src/embed-many/openai-cosine-similarity.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import { openai } from '@ai-sdk/openai'; | ||
import { cosineSimilarity, embedMany } from 'ai'; | ||
import dotenv from 'dotenv'; | ||
|
||
dotenv.config(); | ||
|
||
async function main() { | ||
const { embeddings } = await embedMany({ | ||
model: openai.embedding('text-embedding-3-small'), | ||
values: ['sunny day at the beach', 'rainy afternoon in the city'], | ||
}); | ||
|
||
console.log( | ||
`cosine similarity: ${cosineSimilarity(embeddings[0], embeddings[1])}`, | ||
); | ||
} | ||
|
||
main().catch(console.error); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import { cosineSimilarity } from './cosine-similarity'; | ||
|
||
it('should calculate cosine similarity correctly', () => { | ||
const vector1 = [1, 2, 3]; | ||
const vector2 = [4, 5, 6]; | ||
|
||
const result = cosineSimilarity(vector1, vector2); | ||
|
||
// test against pre-calculated value: | ||
expect(result).toBeCloseTo(0.9746318461970762, 5); | ||
}); | ||
|
||
it('should calculate negative cosine similarity correctly', () => { | ||
const vector1 = [1, 0]; | ||
const vector2 = [-1, 0]; | ||
|
||
const result = cosineSimilarity(vector1, vector2); | ||
|
||
// test against pre-calculated value: | ||
expect(result).toBeCloseTo(-1, 5); | ||
}); | ||
|
||
it('should throw an error when vectors have different lengths', () => { | ||
const vector1 = [1, 2, 3]; | ||
const vector2 = [4, 5]; | ||
|
||
expect(() => cosineSimilarity(vector1, vector2)).toThrowError(); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
/** | ||
* Calculates the cosine similarity between two vectors. This is a useful metric for | ||
* comparing the similarity of two vectors such as embeddings. | ||
* | ||
* @param vector1 - The first vector. | ||
* @param vector2 - The second vector. | ||
* | ||
* @returns The cosine similarity between vector1 and vector2. | ||
* @throws {Error} If the vectors do not have the same length. | ||
*/ | ||
export function cosineSimilarity(vector1: number[], vector2: number[]) { | ||
if (vector1.length !== vector2.length) { | ||
throw new Error( | ||
`Vectors must have the same length (vector1: ${vector1.length} elements, vector2: ${vector2.length} elements)`, | ||
); | ||
} | ||
|
||
return ( | ||
dotProduct(vector1, vector2) / (magnitude(vector1) * magnitude(vector2)) | ||
); | ||
} | ||
|
||
/** | ||
* Calculates the dot product of two vectors. | ||
* @param vector1 - The first vector. | ||
* @param vector2 - The second vector. | ||
* @returns The dot product of vector1 and vector2. | ||
*/ | ||
function dotProduct(vector1: number[], vector2: number[]) { | ||
return vector1.reduce( | ||
(accumulator: number, value: number, index: number) => | ||
accumulator + value * vector2[index]!, | ||
0, | ||
); | ||
} | ||
|
||
/** | ||
* Calculates the magnitude of a vector. | ||
* @param vector - The vector. | ||
* @returns The magnitude of the vector. | ||
*/ | ||
function magnitude(vector: number[]) { | ||
return Math.sqrt(dotProduct(vector, vector)); | ||
} |