使用 RedisVL 进行查询

在本 notebook 中，我们将探索使用 redisvl 执行更复杂的查询。

在运行此 notebook 之前，请确保：

已安装 redisvl 并激活了该环境，以便在本 notebook 中使用。
有一个运行中的 Redis 实例，且 RediSearch > 2.4 已启动并运行。

import pickle
from jupyterutils import table_print, result_print

# load in the example data and printing utils
data = pickle.load(open("hybrid_example_data.pkl", "rb"))
table_print(data)

用户	年龄	职业	信用评分	办公地点	用户嵌入	最后更新
john	18	工程师	高	-122.4194,37.7749	b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'	1741627789
derrick	14	医生	低	-122.4194,37.7749	b'\xcd\xcc\xcc=\xcd\xcc\xcc=\x00\x00\x00?'	1741627789
nancy	94	医生	高	-122.4194,37.7749	b'333?\xcd\xcc\xcc=\x00\x00\x00?'	1710696589
tyler	100	工程师	高	-122.0839,37.3861	b'\xcd\xcc\xcc=\xcd\xcc\xcc>\x00\x00\x00?'	1742232589
tim	12	皮肤科医生	高	-122.0839,37.3861	b'\xcd\xcc\xcc>\xcd\xcc\xcc>\x00\x00\x00?'	1739644189
taimur	15	CEO	低	-122.0839,37.3861	b'\x9a\x99\x19?\xcd\xcc\xcc=\x00\x00\x00?'	1742232589
joe	35	牙医	中等	-122.0839,37.3861	b'fff?fff?\xcd\xcc\xcc='	1742232589

schema = {
    "index": {
        "name": "user_queries",
        "prefix": "user_queries_docs",
        "storage_type": "hash", # default setting -- HASH
    },
    "fields": [
        {"name": "user", "type": "tag"},
        {"name": "credit_score", "type": "tag"},
        {"name": "job", "type": "text"},
        {"name": "age", "type": "numeric"},
        {"name": "last_updated", "type": "numeric"},
        {"name": "office_location", "type": "geo"},
        {
            "name": "user_embedding",
            "type": "vector",
            "attrs": {
                "dims": 3,
                "distance_metric": "cosine",
                "algorithm": "flat",
                "datatype": "float32"
            }

        }
    ],
}

from redisvl.index import SearchIndex

# construct a search index from the schema
index = SearchIndex.from_dict(schema, redis_url="redis://:6379")

# create the index (no data yet)
index.create(overwrite=True)

11:40:25 redisvl.index.index INFO   Index already exists, overwriting.

# use the CLI to see the created index
!rvl index listall

# load data to redis
keys = index.load(data)

index.info()['num_docs']

混合查询

混合查询是结合了多种类型过滤器的查询。例如，您可能想要搜索特定年龄、特定职业且距离某个地点在一定范围内的用户。这是一种结合了数值、标签和地理位置过滤器的混合查询。

标签过滤器

标签过滤器应用于标签字段。这些字段未进行分词，用于存储单个分类值。

from redisvl.query import VectorQuery
from redisvl.query.filter import Tag

t = Tag("credit_score") == "high"

v = VectorQuery(
    vector=[0.1, 0.1, 0.5],
    vector_field_name="user_embedding",
    return_fields=["user", "credit_score", "age", "job", "office_location", "last_updated"],
    filter_expression=t
)

results = index.query(v)
result_print(results)

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0	john	高	18	工程师	-122.4194,37.7749	1741627789
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861	1742232589
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861	1739644189
0.266666650772	nancy	高	94	医生	-122.4194,37.7749	1710696589

# negation
t = Tag("credit_score") != "high"

v.set_filter(t)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0	derrick	低	14	医生	-122.4194,37.7749	1741627789
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861	1742232589
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861	1742232589

# use multiple tags as a list
t = Tag("credit_score") == ["high", "medium"]

v.set_filter(t)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861

# use multiple tags as a set (to enforce uniqueness)
t = Tag("credit_score") == set(["high", "high", "medium"])

v.set_filter(t)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861

如果您想动态生成标签列表怎么办？不用担心。RedisVL 允许您优雅地实现这一点，而无需检查空情况。空情况是指您尝试在没有定义匹配值的字段上运行标签过滤器时。

Tag("credit_score") == []

像上面那样的空过滤器将产生一个 * Redis 查询过滤器，这意味着基本情况——此处没有可用的过滤器。

# gracefully fallback to "*" filter if empty case
empty_case = Tag("credit_score") == []

v.set_filter(empty_case)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861

数值过滤器

数值过滤器应用于数值字段，可用于隔离给定字段的值范围。

from redisvl.query.filter import Num

numeric_filter = Num("age").between(15, 35)

v.set_filter(numeric_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0	john	高	18	工程师	-122.4194,37.7749	1741627789
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861	1742232589
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861	1742232589

# exact match query
numeric_filter = Num("age") == 14

v.set_filter(numeric_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	derrick	低	14	医生	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749

# negation
numeric_filter = Num("age") != 14

v.set_filter(numeric_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749

时间戳过滤器

在 Redis 中，所有时间都存储为 epoch 时间数值，但是此类允许您使用 Python datetime 进行过滤，以便于使用。

from redisvl.query.filter import Timestamp
from datetime import datetime

dt = datetime(2025, 3, 16, 13, 45, 39, 132589)
print(f'Epoch comparison: {dt.timestamp()}')

timestamp_filter = Timestamp("last_updated") > dt

v.set_filter(timestamp_filter)
result_print(index.query(v))

Epoch comparison: 1742147139.132589

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861	1742232589
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861	1742232589
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861	1742232589

from redisvl.query.filter import Timestamp
from datetime import datetime

dt = datetime(2025, 3, 16, 13, 45, 39, 132589)

print(f'Epoch comparison: {dt.timestamp()}')

timestamp_filter = Timestamp("last_updated") < dt

v.set_filter(timestamp_filter)
result_print(index.query(v))

Epoch comparison: 1742147139.132589

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0	derrick	低	14	医生	-122.4194,37.7749	1741627789
0	john	高	18	工程师	-122.4194,37.7749	1741627789
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861	1739644189
0.266666650772	nancy	高	94	医生	-122.4194,37.7749	1710696589

from redisvl.query.filter import Timestamp
from datetime import datetime

dt_1 = datetime(2025, 1, 14, 13, 45, 39, 132589)
dt_2 = datetime(2025, 3, 16, 13, 45, 39, 132589)

print(f'Epoch between: {dt_1.timestamp()} - {dt_2.timestamp()}')

timestamp_filter = Timestamp("last_updated").between(dt_1, dt_2)

v.set_filter(timestamp_filter)
result_print(index.query(v))

Epoch between: 1736880339.132589 - 1742147139.132589

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0	derrick	低	14	医生	-122.4194,37.7749	1741627789
0	john	高	18	工程师	-122.4194,37.7749	1741627789
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861	1739644189

文本过滤器

文本过滤器应用于文本字段。这些过滤器应用于整个文本字段。例如，如果您有一个文本字段包含文本 "The quick brown fox jumps over the lazy dog"，则 "quick" 的文本过滤器将匹配此文本字段。

from redisvl.query.filter import Text

# exact match filter -- document must contain the exact word doctor
text_filter = Text("job") == "doctor"

v.set_filter(text_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点	最后更新
0	derrick	低	14	医生	-122.4194,37.7749	1741627789
0.266666650772	nancy	高	94	医生	-122.4194,37.7749	1710696589

# negation -- document must not contain the exact word doctor
negate_text_filter = Text("job") != "doctor"

v.set_filter(negate_text_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861

# wildcard match filter
wildcard_filter = Text("job") % "doct*"

v.set_filter(wildcard_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	derrick	低	14	医生	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749

# fuzzy match filter
fuzzy_match = Text("job") % "%%engine%%"

v.set_filter(fuzzy_match)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861

# conditional -- match documents with job field containing engineer OR doctor
conditional = Text("job") % "engineer|doctor"

v.set_filter(conditional)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749

# gracefully fallback to "*" filter if empty case
empty_case = Text("job") % ""

v.set_filter(empty_case)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861

使用原始查询字符串作为输入。下面我们使用 ~ 标志来指示全文查询是可选的。我们还选择了 BM25 评分器，并在结果中返回文档得分。

v.set_filter("(~(@job:engineer))")
v.scorer("BM25").with_scores()

index.query(v)

[{'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS',
  'score': 1.8181817787737895,
  'vector_distance': '0',
  'user': 'john',
  'credit_score': 'high',
  'age': '18',
  'job': 'engineer',
  'office_location': '-122.4194,37.7749'},
 {'id': 'user_queries_docs:01JMJJHE2899024DYPXT6424N9',
  'score': 0.0,
  'vector_distance': '0',
  'user': 'derrick',
  'credit_score': 'low',
  'age': '14',
  'job': 'doctor',
  'office_location': '-122.4194,37.7749'},
 {'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT',
  'score': 1.8181817787737895,
  'vector_distance': '0',
  'user': 'john',
  'credit_score': 'high',
  'age': '18',
  'job': 'engineer',
  'office_location': '-122.4194,37.7749'},
 {'id': 'user_queries_docs:01JMJJPEYD544WB1TKDBJ3Z3J9',
  'score': 0.0,
  'vector_distance': '0',
  'user': 'derrick',
  'credit_score': 'low',
  'age': '14',
  'job': 'doctor',
  'office_location': '-122.4194,37.7749'},
 {'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ',
  'score': 1.8181817787737895,
  'vector_distance': '0.109129190445',
  'user': 'tyler',
  'credit_score': 'high',
  'age': '100',
  'job': 'engineer',
  'office_location': '-122.0839,37.3861'},
 {'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND',
  'score': 1.8181817787737895,
  'vector_distance': '0.109129190445',
  'user': 'tyler',
  'credit_score': 'high',
  'age': '100',
  'job': 'engineer',
  'office_location': '-122.0839,37.3861'},
 {'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V',
  'score': 0.0,
  'vector_distance': '0.158808946609',
  'user': 'tim',
  'credit_score': 'high',
  'age': '12',
  'job': 'dermatologist',
  'office_location': '-122.0839,37.3861'},
 {'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ',
  'score': 0.0,
  'vector_distance': '0.158808946609',
  'user': 'tim',
  'credit_score': 'high',
  'age': '12',
  'job': 'dermatologist',
  'office_location': '-122.0839,37.3861'},
 {'id': 'user_queries_docs:01JMJJHE28NR7KF0EZEA433T2J',
  'score': 0.0,
  'vector_distance': '0.217882037163',
  'user': 'taimur',
  'credit_score': 'low',
  'age': '15',
  'job': 'CEO',
  'office_location': '-122.0839,37.3861'},
 {'id': 'user_queries_docs:01JMJJPEYD9EAVGJ2AZ8K9VX7Q',
  'score': 0.0,
  'vector_distance': '0.217882037163',
  'user': 'taimur',
  'credit_score': 'low',
  'age': '15',
  'job': 'CEO',
  'office_location': '-122.0839,37.3861'}]

地理位置过滤器

地理位置过滤器应用于地理位置字段。这些过滤器用于查找距离给定点一定范围内的结果。距离以千米、英里、米或英尺为单位指定。还可以指定半径来查找距离给定点一定半径范围内的结果。

from redisvl.query.filter import Geo, GeoRadius

# within 10 km of San Francisco office
geo_filter = Geo("office_location") == GeoRadius(-122.4194, 37.7749, 10, "km")

v.set_filter(geo_filter)
result_print(index.query(v))

得分	vector_distance	用户	信用评分	年龄	职业	办公地点
0.4545454446934474	0	john	高	18	工程师	-122.4194,37.7749
0.4545454446934474	0	derrick	低	14	医生	-122.4194,37.7749
0.4545454446934474	0	john	高	18	工程师	-122.4194,37.7749
0.4545454446934474	0	derrick	低	14	医生	-122.4194,37.7749
0.4545454446934474	0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.4545454446934474	0.266666650772	nancy	高	94	医生	-122.4194,37.7749

# within 100 km Radius of San Francisco office
geo_filter = Geo("office_location") == GeoRadius(-122.4194, 37.7749, 100, "km")

v.set_filter(geo_filter)
result_print(index.query(v))

得分	vector_distance	用户	信用评分	年龄	职业	办公地点
0.4545454446934474	0	john	高	18	工程师	-122.4194,37.7749
0.4545454446934474	0	derrick	低	14	医生	-122.4194,37.7749
0.4545454446934474	0	john	高	18	工程师	-122.4194,37.7749
0.4545454446934474	0	derrick	低	14	医生	-122.4194,37.7749
0.4545454446934474	0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.4545454446934474	0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.4545454446934474	0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.4545454446934474	0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.4545454446934474	0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.4545454446934474	0.217882037163	taimur	低	15	CEO	-122.0839,37.3861

# not within 10 km Radius of San Francisco office
geo_filter = Geo("office_location") != GeoRadius(-122.4194, 37.7749, 10, "km")

v.set_filter(geo_filter)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861

组合过滤器

在此示例中，我们将结合数值过滤器和标签过滤器。我们将搜索年龄在 20 到 30 岁之间且职业为“工程师”的用户。

交集（“与”）

t = Tag("credit_score") == "high"
low = Num("age") >= 18
high = Num("age") <= 100
ts = Timestamp("last_updated") > datetime(2025, 3, 16, 13, 45, 39, 132589)

combined = t & low & high & ts

v = VectorQuery([0.1, 0.1, 0.5],
                "user_embedding",
                return_fields=["user", "credit_score", "age", "job",  "office_location"],
                filter_expression=combined)


result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861

并集（“或”）

两个查询的并集是指由这两个查询中任意一个返回的所有结果的集合。两个查询的并集使用 | 运算符执行。

low = Num("age") < 18
high = Num("age") > 93

combined = low | high

v.set_filter(combined)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	derrick	低	14	医生	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749

动态组合

您可能需要或可能不需要在给定查询中使用过滤器。如上所示，过滤器将接受 None 类型并恢复为通配符过滤器，实际上返回所有结果。

过滤器组合也同样如此，这使得可以在具有不同参数的请求中快速重用过滤器，如下所示。这消除了测试空情况所需的大量“if-then”条件语句。

def make_filter(age=None, credit=None, job=None):
    flexible_filter = (
        (Num("age") > age) &
        (Tag("credit_score") == credit) &
        (Text("job") % job)
    )
    return flexible_filter

# all parameters
combined = make_filter(age=18, credit="high", job="engineer")
v.set_filter(combined)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861

# just age and credit_score
combined = make_filter(age=18, credit="high")
v.set_filter(combined)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749

# just age
combined = make_filter(age=18)
v.set_filter(combined)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.266666650772	nancy	高	94	医生	-122.4194,37.7749
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861
0.653301358223	joe	中等	35	牙医	-122.0839,37.3861

# no filters
combined = make_filter()
v.set_filter(combined)
result_print(index.query(v))

vector_distance	用户	信用评分	年龄	职业	办公地点
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0	john	高	18	工程师	-122.4194,37.7749
0	derrick	低	14	医生	-122.4194,37.7749
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.109129190445	tyler	高	100	工程师	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.158808946609	tim	高	12	皮肤科医生	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861
0.217882037163	taimur	低	15	CEO	-122.0839,37.3861

非向量查询

在某些情况下，您可能不想运行向量查询，而只想使用类似于 SQL 查询的 FilterExpression。FilterQuery 类启用了此功能。它类似于 VectorQuery 类，但仅接受 FilterExpression。

from redisvl.query import FilterQuery

has_low_credit = Tag("credit_score") == "low"

filter_query = FilterQuery(
    return_fields=["user", "credit_score", "age", "job", "location"],
    filter_expression=has_low_credit
)

results = index.query(filter_query)

result_print(results)

用户	信用评分	年龄	职业
derrick	低	14	医生
taimur	低	15	CEO
derrick	低	14	医生
taimur	低	15	CEO

计数查询

在某些情况下，您可能需要使用 FilterExpression 来执行 CountQuery，该查询仅返回相关集合中实体的数量。它类似于 FilterQuery 类，但不返回底层数据的值。

from redisvl.query import CountQuery

has_low_credit = Tag("credit_score") == "low"

filter_query = CountQuery(filter_expression=has_low_credit)

count = index.query(filter_query)

print(f"{count} records match the filter expression {str(has_low_credit)} for the given index.")

4 records match the filter expression @credit_score:{low} for the given index.

范围查询

范围查询是一种有用的向量搜索方法，其中仅返回向量 distance_threshold 范围内的结果。这使得用户能够在其数据集中找到与查询向量相似的所有记录，其中“相似”由定量值定义。

from redisvl.query import RangeQuery

range_query = RangeQuery(
    vector=[0.1, 0.1, 0.5],
    vector_field_name="user_embedding",
    return_fields=["user", "credit_score", "age", "job", "location"],
    distance_threshold=0.2
)

# same as the vector query or filter query
results = index.query(range_query)

result_print(results)

vector_distance	用户	信用评分	年龄	职业
0	john	高	18	工程师
0	derrick	低	14	医生
0	john	高	18	工程师
0	derrick	低	14	医生
0.109129190445	tyler	高	100	工程师
0.109129190445	tyler	高	100	工程师
0.158808946609	tim	高	12	皮肤科医生
0.158808946609	tim	高	12	皮肤科医生

如果需要，我们也可以在使用之间更改查询对象的距离阈值。此处我们将设置 distance_threshold==0.1。这意味着查询对象将返回距离查询对象在 0.1 范围内的所有匹配项。这是一个很小的距离，因此我们期望获得的匹配项比之前少。

range_query.set_distance_threshold(0.1)

result_print(index.query(range_query))

用户	信用评分	年龄	职业
john	高	18	工程师
derrick	低	14	医生
john	高	18	工程师
derrick	低	14	医生

范围查询也可以像任何其他查询类型一样与过滤器一起使用。以下将结果限制为仅包含 job 为 engineer 的记录，同时也在向量范围（即距离）内。

is_engineer = Text("job") == "engineer"

range_query.set_filter(is_engineer)

result_print(index.query(range_query))

vector_distance	用户	信用评分	年龄	职业
0	john	高	18	工程师
0	john	高	18	工程师

高级查询修饰符

请参阅查询 API 文档中所有可用的修饰符选项：https://redis.ac.cn/docs/latest/integrate/redisvl/api/query

# Sort by a different field and change dialect
v = VectorQuery(
    vector=[0.1, 0.1, 0.5],
    vector_field_name="user_embedding",
    return_fields=["user", "credit_score", "age", "job",  "office_location"],
    num_results=5,
    filter_expression=is_engineer
).sort_by("age", asc=False).dialect(3)

result = index.query(v)
result_print(result)

vector_distance	年龄	用户	信用评分	职业	办公地点
0.109129190445	100	tyler	高	工程师	-122.0839,37.3861
0.109129190445	100	tyler	高	工程师	-122.0839,37.3861
0	18	john	高	工程师	-122.4194,37.7749
0	18	john	高	工程师	-122.4194,37.7749

原始 Redis 查询字符串

有时将这些类转换为其原始 Redis 查询字符串会很有帮助。

# check out the complex query from above
str(v)

'@job:("engineer")=>[KNN 5 @user_embedding $vector AS vector_distance] RETURN 6 user credit_score age job office_location vector_distance SORTBY age DESC DIALECT 3 LIMIT 0 5'

t = Tag("credit_score") == "high"

str(t)

'@credit_score:{high}'

t = Tag("credit_score") == "high"
low = Num("age") >= 18
high = Num("age") <= 100

combined = t & low & high

str(combined)

'((@credit_score:{high} @age:[18 +inf]) @age:[-inf 100])'

RedisVL 的 SearchIndex 类暴露了一个 search() 方法，它是 FT.SEARCH API 的简单封装。提供任何有效的 Redis 查询字符串。

results = index.search(str(t))
for r in results.docs:
    print(r.__dict__)

{'id': 'user_queries_docs:01JMJJHE28G5F943YGWMB1ZX1V', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJHE28ZW4F33ZNRKXRHYCS', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJHE28B5R6T00DH37A7KSJ', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJHE28EX13NEE7BGBM8FH3', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYCQ89ZQW6QR27J72WT', 'payload': None, 'user': 'john', 'age': '18', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '==\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYDAN0M3V7EQEVPS6HX', 'payload': None, 'user': 'nancy', 'age': '94', 'job': 'doctor', 'credit_score': 'high', 'office_location': '-122.4194,37.7749', 'user_embedding': '333?=\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYDPF9S5328WHCQN0ND', 'payload': None, 'user': 'tyler', 'age': '100', 'job': 'engineer', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '=>\x00\x00\x00?'}
{'id': 'user_queries_docs:01JMJJPEYDKA9ARKHRK1D7KPXQ', 'payload': None, 'user': 'tim', 'age': '12', 'job': 'dermatologist', 'credit_score': 'high', 'office_location': '-122.0839,37.3861', 'user_embedding': '>>\x00\x00\x00?'}

# Cleanup
index.delete()

产品

工具

获取 Redis

连接

学习

最新

查看工作原理