import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

class SemanticSearch:
    def __init__(self, model, data_path='app/data/sample_data.xlsx', columns=['Cat_id', 'Cat_name']):
        self.df = pd.read_excel(data_path)
        self.columns = columns
        # Combine specified columns into one text entry per row
        self.texts = self.df[columns].astype(str).agg(' '.join, axis=1).tolist()
        self.embeddings = model.encode(self.texts, normalize_embeddings=True)
        self.model = model

    def search(self, query, top_k=3):
        query_emb = self.model.encode([query], normalize_embeddings=True)
        scores = cosine_similarity(query_emb, self.embeddings)[0]
        top_k_idx = np.argsort(scores)[::-1][:top_k]
        return [
            {"text": self.texts[i], "score": float(scores[i])}
            for i in top_k_idx
        ]
