52 lines
1.5 KiB
Java

package com.pablotj.ia.chat.boot;
import de.kherud.llama.InferenceParameters;
import de.kherud.llama.LlamaModel;
import de.kherud.llama.ModelParameters;
import de.kherud.llama.LlamaOutput;
import jakarta.annotation.PostConstruct;
import org.springframework.stereotype.Service;
@Service
public class LlamaService implements AutoCloseable {
private LlamaModel model;
@PostConstruct
public void init() {
try {
ModelParameters params = new ModelParameters()
.setModelFilePath("models/openchat-3.5-0106.Q4_K_M.gguf");
model = new LlamaModel(params);
} catch (Exception e) {
throw new RuntimeException("Error cargando el modelo", e);
}
}
public String chat(String prompt) {
PromptBuilder chat = new PromptBuilder("You are a helpful assistant");
// Historial previo
// chat.user("Pregunta");
// chat.assistant("Respuesta");
chat.user(prompt);
String finalPrompt = chat.build();
InferenceParameters inf = new InferenceParameters(finalPrompt)
.setTemperature(0.7f)
.setTopP(0.9f)
.setTopK(40);
StringBuilder sb = new StringBuilder();
for (LlamaOutput out : model.generate(inf)) {
sb.append(out.text);
}
return sb.toString().replace("<|end_of_turn|>", "").trim();
}
@Override
public void close() {
if (model != null) model.close();
}
}