
    h9!                         d dl mZmZmZ d dlZd dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZmZmZ d dlmZmZmZ  G d d	e      Z G d
 de      Z G d d      Zy)    )DictAnyOptionalN)Field)create_enginetext)Engine)AtomicAgentAgentConfigBaseIOSchemaBaseToolBaseToolConfig)ChatHistorySystemPromptGeneratorBaseDynamicContextProviderc                   ^    e Zd ZU dZ edd      Zeed<    edd      Ze	e
eef      ed<   y)	SQLQueryInputSchemazGSchema for user input requiring SQL query generation and summarization..z!User's query in natural language.description
user_inputNzJOptional dictionary of filtering conditions, passed as context to the LLM.additional_filters)__name__
__module____qualname____doc__r   r   str__annotations__r   r   r   r        8/var/www/html/hubwallet-dev/src/core/chatbot_SQLAgent.pyr   r      s<    QC-PQJQ38f4c3h0 r   r   c                   l    e Zd ZU dZ edd      Zeed<    edd      Zeed<    edd      Z	eed	<   y
)SQLQueryOutputSchemaz
    Schema for summarizing the query results.

    Attributes:
      summary: A concise and structured summary of the records as per user requirements.
    .zThe generated SQL query.r   generated_queryz8Preview of retrieved records in pandas DataFrame format.dataframe_previewz+A summarized version of the retrieved data.summaryN)
r   r   r   r   r   r#   r   r   r$   r%   r   r   r    r"   r"      s@     !2LMOSM"34noso*WXGSXr   r"   c                   l    e Zd ZddededefdZdedej                  fdZ	dd	ed
e
eeef      defdZy)SQLQueryAgent	db_enginemodeltemperaturec                     d}t        ddd|gg dddg      }t        ||||	      }t        t        t        f   |
      | _        || _        y)aa  
        Initialize the SQLQueryAgent with schema constraints.

        Args:
            client: The LLM client (e.g., from instructor.from_openai).
            db_engine: SQLAlchemy database engine for connecting to PostgreSQL.
            model: The identifier for the model to use.
            temperature: Sampling temperature for the LLM.
        a_	  
        Your task is to generate optimized SQL queries based on the following database schema:

        Table: feedbacks
        - feedback_id (Integer, PK)
        - store_id (Integer, FK -> stores.store_id)
        - datasource_id (Integer, FK -> datasource.ds_id)
        - branch_id (Integer, FK -> branches.branch_id)
        - customer_name (String)
        - feedback_posting_date (DateTime)
        - feedback_source (String)  # e.g., Google, Yelp, Hubwallet
        - feedback_rating (Integer)  # Rating between 1-5
        - feedback_type (String)  # e.g., text, voice
        - original_content (Text)  # Raw feedback data
        - transcription (Text)  # If voice feedback is provided
        - sentiment (String)  # positive, neutral, negative, mixed
        - confidence_score (Float)
        - emotion (String)  # e.g., frustration, happiness, disappointment
        - arousal (String)  # passive, active
        - created_at (DateTime)

        Table: emotions
        - emotion_id (Integer, PK)
        - store_id (Integer, FK -> stores.store_id)
        - datasource_id (Integer, FK -> datasource.ds_id)
        - branch_id (Integer, FK -> branches.branch_id)
        - feedback_id (Integer, FK -> feedbacks.feedback_id)
        - emotions (String)  # e.g., happiness, sadness, anger
        - created_at (DateTime)

        Table: words
        - word_id (Integer, PK)
        - store_id (Integer, FK -> stores.store_id)
        - datasource_id (Integer, FK -> datasource.ds_id)
        - branch_id (Integer, FK -> branches.branch_id)
        - feedback_id (Integer, FK -> feedbacks.feedback_id)
        - words (String)
        - sentiment (String)  # positive, neutral, negative, mixed
        - created_at (DateTime)

        Table: review_topics
        - rt_id (Integer, PK)
        - store_id (Integer, FK -> stores.store_id)
        - datasource_id (Integer, FK -> datasource.ds_id)
        - branch_id (Integer, FK -> branches.branch_id)
        - feedback_id (Integer, FK -> feedbacks.feedback_id)
        - topic_id (Integer, FK -> topics.topic_id)
        - topic_name (String)  # e.g., service, food, ambiance
        - topic_sentiment (String)  # positive, neutral, negative, mixed
        - created_at (DateTime)

        -- All queries must include a WHERE clause to filter by branch_id.
        -- branch_id should always be included in every SQL query.
        z;You are an expert SQL query generator and database analyst.zDYou understand database schemas and can write optimized SQL queries.z5You help users extract insights from structured data.)z+Receive the user query in natural language.zNAnalyze additional filters provided and include them in the query if relevant.zNGenerate a structured SQL query using the appropriate schema for the database.z?Ensure the query always includes a WHERE clause with branch_id.zJExecute the SQL query on the PostgreSQL database and retrieve the records.z>Convert the retrieved data into a structured pandas DataFrame.zSummarize the extracted records into a concise, structured response for the user. Do not mention branch_id, any SQL queires or any techinal term in the summaryziReturn only a JSON object with the following keys: 'generated_query', 'dataframe_preview', and 'summary'.zEnsure the summary is user-friendly, concise, and includes key insights from the retrieved data. Do not mention branch_id, any SQL queires or any techinal term in the summary)
backgroundstepsoutput_instructions)clientr)   r*   system_prompt_generator)configN)r   r   r
   r   r"   agentr(   )selfr/   r(   r)   r*   database_schemar0   r1   s           r    __init__zSQLQueryAgent.__init__!   sx    5n #8MVG	 | A!!#
, #$;	
 !!45I!IJRXY
"r   queryreturnc                    | j                   j                         5 }|j                  t        |            }|j	                         }|j                         }t        j                  ||      cddd       S # 1 sw Y   yxY w)z
        Execute the generated SQL query on PostgreSQL and return results as a pandas DataFrame.

        Args:
            query: The SQL query to be executed.

        Returns:
            A pandas DataFrame containing the retrieved records.
        )columnsN)r(   connectexecuter   fetchallkeyspd	DataFrame)r3   r6   
connectionresultrecordsr9   s         r    execute_queryzSQLQueryAgent.execute_query   sd     ^^##% 	:''U4Foo'GkkmG<<9		: 	: 	:s   AA66A?Nr   r   c                    t        ||      }| j                  j                  |      }|j                  }| j	                  |      }|j                         j                  d      }d|j                  d       d|j                   d}| j                  j                  t        |            }	t        |||	j                        S )	ah  
        Process user input, generate an SQL query, execute it, retrieve results, and summarize.

        Args:
            user_input: The user's query in natural language.
            additional_filters: Optional dictionary of filters, passed to the LLM.

        Returns:
            An instance of SQLQueryOutputSchema containing only the summary.
        )r   r   F)indexz!Summarize the following dataset:
z  as per user's question: zJ. IMPORTANT: The answer to the user's question is in the provided dataset.)r   )r#   r$   r%   )
r   r2   runr#   rC   head	to_stringr   r"   r%   )
r3   r   r   
input_datarA   r#   dfr$   summary_inputsummary_results
             r    process_queryzSQLQueryAgent.process_query   s     )JSef
 
+ 00 0GGI//e/< =R\\PU\=V<WWqr|  sH  sH  rI  IS  T(;}(UV#+/"**
 	
r   )zgpt-4o-minig?)N)r   r   r   r	   r   floatr5   r>   r?   rC   r   r   r   r"   rM   r   r   r    r'   r'       sb    a#& a# a#[` a#F:3 :2<< : 
 
$sTWx.AY 
ey 
r   r'   )typingr   r   r   pandasr>   pydanticr   
sqlalchemyr   r   sqlalchemy.enginer	   atomic_agentsr
   r   r   r   r   atomic_agents.contextr   r   r   r   r"   r'   r   r   r    <module>rV      sL    & &   * $ [ Z ` `, 	Y< 	YS
 S
r   