1. Milvus的Maven依赖, 配置如下
<dependency><groupId>io.milvus</groupId><artifactId>milvus-sdk-java</artifactId><version>2.3.4</version><exclusions><exclusion><artifactId>log4j-slf4j-impl</artifactId><groupId>org.apache.logging.log4j</groupId></exclusion></exclusions></dependency>
PS: 请注意!引入的版本要看你部署的milvus服务的版本是多少,然后milvus官网上会有milvus服务对应的java sdk版本的版本号,版本号一定要对应的上 这样相应的版本api文档接口才可以用
milvus官方文档:Milvus v2.3.x documentation
然后2.3.4版本的java sdk的milvus还需要引用google 的protobuf包,不然会报错提示找不到此包
此包也要注意对应milvus的版本 这里官网没说明,我自行尝试可用的是3.24.1版本对应milvus的2.3.4版本的, 配置如下:
<dependency><groupId>com.google.protobuf</groupId><artifactId>protobuf-java</artifactId><version>3.24.1</version></dependency>
2. 向量库的配置类 获取向量库服务地址 登录用户密码等
import io.milvus.client.MilvusServiceClient;
import io.milvus.param.ConnectParam;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;@Configuration
public class MilvusConfig {@Value("${milvus.host}")private String host;@Value("${milvus.port}")private Integer port;@Value("${milvus.username}")private String username;@Value("${milvus.password}")private String password;@Beanpublic MilvusServiceClient milvusServiceClient() {return new MilvusServiceClient(ConnectParam.newBuilder().withHost(host).withPort(port).withAuthorization(username, password).build());}
}
application.yml配置文件里配置相应的数据信息
3. 根据milvus 2.3.5 java SDK提供的API接口 调测相关使用的接口
如:创建集合,创建索引,加载集合到内存,插入向量数据,查询向量数据并返回结果 删除集合
import java.util.List;/*** milvus向量数据库相关业务接口** @author Jx* @version 2024-3-18*/
public interface IMilvusService {Boolean hasCollect(String collectionName);void create(String collectionName, String desc);Boolean insert(String name, List<Long> textIds, List<List<Float>> vectorList);List<Long> search(String name, int topK, List<List<Float>> vectorList);void dropCollect(String name);void createIndex(String name);void dropVectors(String name, List<Long> indexIds);
}
实现类
import com.beust.jcommander.internal.Lists;
import com.geb.config.FaceArchive;
import com.geb.service.IMilvusService;
import io.milvus.client.MilvusServiceClient;
import io.milvus.common.clientenum.ConsistencyLevelEnum;
import io.milvus.grpc.DataType;
import io.milvus.grpc.GetLoadStateResponse;
import io.milvus.grpc.MutationResult;
import io.milvus.grpc.SearchResults;
import io.milvus.param.IndexType;
import io.milvus.param.MetricType;
import io.milvus.param.R;
import io.milvus.param.RpcStatus;
import io.milvus.param.collection.*;
import io.milvus.param.dml.DeleteParam;
import io.milvus.param.dml.InsertParam;
import io.milvus.param.dml.SearchParam;
import io.milvus.param.highlevel.collection.ListCollectionsParam;
import io.milvus.param.highlevel.collection.response.ListCollectionsResponse;
import io.milvus.param.highlevel.dml.DeleteIdsParam;
import io.milvus.param.highlevel.dml.response.DeleteResponse;
import io.milvus.param.index.CreateIndexParam;
import io.milvus.response.SearchResultsWrapper;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;import java.util.ArrayList;
import java.util.List;
import java.util.Map;@Slf4j
@Service
public class MilvusServiceImpl implements IMilvusService {@Autowiredprivate MilvusServiceClient milvusServiceClient;final IndexType INDEX_TYPE = IndexType.IVF_FLAT; // IndexTypefinal String INDEX_PARAM = "{\"nlist\":1024}"; // ExtraParam/*** 创建集合的字段* text_id 对应的文本id* vector 向量字段* tag 标签*/private final String TEXTID = "text_id";private final String VECTOR = "vector";private final String TAG = "tag";private final int dimension = 1024;/*** 创建集合 指定集合名称*/@Overridepublic void create(String collectionName, String desc){log.info("Miluvs create collectionName:{}, desc:{}", collectionName, desc);boolean has = hasCollect(collectionName);log.info("Miluvs hasCollect:{}", has);// 不存在此集合才进行创建集合if(!has){// 创建集合 设置索引 加载集合到内存中FieldType fieldType1 = FieldType.newBuilder().withName(TEXTID).withDataType(DataType.Int64).withPrimaryKey(true).withAutoID(false).build();FieldType fieldType2 = FieldType.newBuilder().withName(VECTOR) // 设置向量名称.withDataType(DataType.FloatVector) // 设置向量类型.withDimension(dimension) // 设置向量维度.build();FieldType fieldType3 = FieldType.newBuilder().withName(TAG).withDataType(DataType.Int64).build();CreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder().withCollectionName(collectionName).withDescription(desc).withShardsNum(2).addFieldType(fieldType1).addFieldType(fieldType2).addFieldType(fieldType3).withEnableDynamicField(true).withConsistencyLevel(ConsistencyLevelEnum.BOUNDED).build();R<RpcStatus> response = milvusServiceClient.createCollection(createCollectionReq);if (response.getStatus() != R.Status.Success.getCode()) {log.info("milvus create fail message:{}", response.getMessage());}else{// 创建集合索引并加载集合到内存 插入数据和搜索的前置操作!!createIndex(collectionName);}}}/*** 创建集合索引 -- 加在向量字段上* @param collectionName*/public void createIndex(String collectionName){milvusServiceClient.createIndex(CreateIndexParam.newBuilder().withCollectionName(collectionName).withFieldName(VECTOR).withIndexType(INDEX_TYPE).withMetricType(MetricType.L2).withExtraParam(INDEX_PARAM).withSyncMode(Boolean.FALSE).build());// 加载所创建的集合loadCollection(collectionName);}/*** 加载集合* @param collectionName*/public void loadCollection(String collectionName){milvusServiceClient.loadCollection(LoadCollectionParam.newBuilder().withCollectionName(collectionName).build());// You can check the loading statusGetLoadStateParam param = GetLoadStateParam.newBuilder().withCollectionName(collectionName).build();R<GetLoadStateResponse> stateResponse = milvusServiceClient.getLoadState(param);if (stateResponse.getStatus() != R.Status.Success.getCode()) {System.out.println(stateResponse.getMessage());}}/*** 集合是否存在* @return*/@Overridepublic Boolean hasCollect(String collectionName){R<Boolean> hasResult = milvusServiceClient.hasCollection(HasCollectionParam.newBuilder().withCollectionName(collectionName).build());if (hasResult.getStatus() == R.Status.Success.getCode()) {return hasResult.getData();}return false;}/*** 向量库中插入数据*/@Overridepublic Boolean insert(String name, List<Long> textIds, List<List<Float>> vectorList){log.info("milvus insert name:{}, textIds:{}, vectorList:{}", name, textIds, vectorList);List<Long> tagList = new ArrayList<>();for (Long textId : textIds) {tagList.add(0L);}List<InsertParam.Field> fieldsInsert = new ArrayList<>();fieldsInsert.add(new InsertParam.Field(TEXTID, textIds)); // 文本对应的ids数据listfieldsInsert.add(new InsertParam.Field(VECTOR, vectorList)); // 转换后的向量数据listfieldsInsert.add(new InsertParam.Field(TAG, tagList)); // 标签占位符 给个0InsertParam param = InsertParam.newBuilder().withCollectionName(name).withFields(fieldsInsert).build();R<MutationResult> response = milvusServiceClient.insert(param);if (response.getStatus() != R.Status.Success.getCode()) {log.info("milvus insert vector fail! message:{}", response.getMessage());return false;}else{return true;}}/*** 删除集合* @param collectionName*/@Overridepublic void dropCollect(String collectionName){milvusServiceClient.dropCollection(DropCollectionParam.newBuilder().withCollectionName(collectionName).build());}/*** 根据ids删除向量* @param collectionName* @param indexIds*/@Overridepublic void dropVectors(String collectionName, List<Long> indexIds){String expr = TEXTID + " in " + indexIds;DeleteParam param = DeleteParam.newBuilder().withCollectionName(collectionName).withExpr(expr).build();R<MutationResult> response = milvusServiceClient.delete(param);if (response.getStatus() != R.Status.Success.getCode()) {System.out.println(response.getMessage());}}/*** 向量搜索 - 向量库中用具体向量搜索 - 返回indexIds*/@Overridepublic List<Long> search(String collectionName, int topK , List<List<Float>> vectorList){// 构建查询条件 进行向量字段查询 待测试1024维度向量SearchParam searchParam = io.milvus.param.dml.SearchParam.newBuilder().withCollectionName(collectionName).withVectorFieldName(VECTOR).withOutFields(Lists.newArrayList("*")).withVectors(vectorList).withTopK(topK).build();R<SearchResults> searchResults = milvusServiceClient.search(searchParam);if (searchResults.getStatus() != R.Status.Success.getCode()) {log.info(searchResults.getMessage());}List<Long> textIdList = new ArrayList<>() ;SearchResultsWrapper wrapper = new SearchResultsWrapper(searchResults.getData().getResults());for (int i = 0; i < vectorList.size(); ++i) {List<SearchResultsWrapper.IDScore> scores = wrapper.getIDScore(i);for (SearchResultsWrapper.IDScore score:scores) {Map<String, Object> filedsMap = score.getFieldValues();textIdList.add(Long.valueOf(String.valueOf(filedsMap.get(TEXTID))));}}return textIdList;}/*** 删除集合中的 id对应的向量*/public void deleteEmbedingById(){List<String> ids = Lists.newArrayList("441966745769900131","441966745769900133");DeleteIdsParam param = DeleteIdsParam.newBuilder().withCollectionName(FaceArchive.COLLECTION_NAME_MILVUS_TESTONE).withPrimaryIds(ids).build();R<DeleteResponse> response = milvusServiceClient.delete(param);if (response.getStatus() != R.Status.Success.getCode()) {System.out.println(response.getMessage());}for (Object deleteId : response.getData().getDeleteIds()) {System.out.println(deleteId);}}// 测试用的向量数据类型public List<List<Float>> getListVector(){List<Float> vectorData = new ArrayList<>();for (int i = 0; i < 1; i++) {vectorData.add((float) Math.random());}List<List<Float>> vectors = new ArrayList<>();vectors.add(vectorData);return vectors;}
}
以上,跟业务进行结合 直接调用操作向量库的API接口即可~
PS:milvus 集成在springboot项目中踩的坑:
#首先就是milvus和protobuf的版本要对应上 可以查下官网api提供的服务端的milvus版本对应的java sdk milvus版本 然后根据milvus sdk版本再找到对应的protobuf版本
#其次 根据官网文档api创建完集合后是无法自动加载集合的 需要手动为集合创建一个索引 比如IVF类型的索引 再进行集合加载到内存 然后才可以对该集合查询插入数据等操作
插入过程中: 所有字段值都不能为空 且 所有字段值条数都需一样 也就是统一字段条数 一致!!
#还有就是,创建集合时候 确定好向量字段的维度,
后面插入向量数据以及查询向量数据的数据维度要与创建向量字段的维度相同!!
注意! milvus向量库只负责向量的操作存储及查询这些,并不负责文本or视频音频转为向量数据的过程,此过程需要专门模型转换进行数据处理为向量数据才可用milvus向量数据库操作!