52 lines
1.5 KiB
Java
52 lines
1.5 KiB
Java
package com.pablotj.ia.chat.boot;
|
|
import de.kherud.llama.InferenceParameters;
|
|
import de.kherud.llama.LlamaModel;
|
|
import de.kherud.llama.ModelParameters;
|
|
import de.kherud.llama.LlamaOutput;
|
|
import jakarta.annotation.PostConstruct;
|
|
import org.springframework.stereotype.Service;
|
|
|
|
@Service
|
|
public class LlamaService implements AutoCloseable {
|
|
|
|
private LlamaModel model;
|
|
|
|
@PostConstruct
|
|
public void init() {
|
|
try {
|
|
ModelParameters params = new ModelParameters()
|
|
.setModelFilePath("models/openchat-3.5-0106.Q4_K_M.gguf");
|
|
model = new LlamaModel(params);
|
|
} catch (Exception e) {
|
|
throw new RuntimeException("Error cargando el modelo", e);
|
|
}
|
|
}
|
|
|
|
public String chat(String prompt) {
|
|
PromptBuilder chat = new PromptBuilder("You are a helpful assistant");
|
|
|
|
// Historial previo
|
|
// chat.user("Pregunta");
|
|
// chat.assistant("Respuesta");
|
|
|
|
chat.user(prompt);
|
|
String finalPrompt = chat.build();
|
|
|
|
InferenceParameters inf = new InferenceParameters(finalPrompt)
|
|
.setTemperature(0.7f)
|
|
.setTopP(0.9f)
|
|
.setTopK(40);
|
|
|
|
|
|
StringBuilder sb = new StringBuilder();
|
|
for (LlamaOutput out : model.generate(inf)) {
|
|
sb.append(out.text);
|
|
}
|
|
return sb.toString().replace("<|end_of_turn|>", "").trim();
|
|
}
|
|
|
|
@Override
|
|
public void close() {
|
|
if (model != null) model.close();
|
|
}
|
|
} |