
    /i7                         d dl mZ d dlmZ d dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ  G d	 d
      Zy)    )Path)ListN)RandomForestRegressor)ColumnTransformer)OneHotEncoder)Pipeline)datec                   :   e Zd ZdZddefdZdedefdZe	de
j                  de
j                  fd       Ze		 dde
j                  d	ee   d
ede
j                  fd       Zde
j                  dedefdZdede
j                  dededee   de
j                  fdZy)CompanyDemandForecasterz
    One model per company.
    Features:
      - location_id, product_id (categorical)
      - date-based features (dow, month, etc.)
      - time-series features: lags + rolling means of qty_sold
    Target:
      - qty_sold (per day)
    	model_dirc                 ^    t        |      | _        | j                  j                  dd       y )NT)parentsexist_ok)r   r   mkdir)selfr   s     I/var/www/html/hubwallet-dev/src/smart_inventory/core/demand_forecaster.py__init__z CompanyDemandForecaster.__init__   s$    iTD9    
company_idreturnc                 (    | j                   d| dz  S )Ncompany_z_demand.pkl)r   )r   r   s     r   _model_pathz#CompanyDemandForecaster._model_path   s    ~~(:,k BBBr   dfc                    | j                         } t        j                  | d   d      | d<   | d   j                  j	                  d       | d<   | d   j                  j
                  | d<   | d   j                  j                         j                  j                  t              | d<   | d   j                  j                  | d<   | d   j                  j                  | d<   | d   j                  dd	g      j                  t              | d
<   | S )Nr	   T)utcday_of_weekweek_of_yearmonthday_of_year      
is_weekend)copypdto_datetimedttz_localizeweekdayisocalendarweekastypeintr   	dayofyearisin)r   s    r   _add_date_featuresz*CompanyDemandForecaster._add_date_features!   s    WWY ^^BvJD96
Z]]..t46
vJMM11=Z]]668==DDSI>jmm))7vJMM33=m,111a&9@@E<	r   
group_cols
target_colc                    | j                         } | j                  |dgz         } | j                  |      |   }|j                  d      | d<   |j                  d      | d<   |j                  d      | d<   |j                  d      j	                  dd      j                         | d	<   |j                  d      j	                  d
d      j                         | d<   g d}| |   j                  d      | |<   | S )z
        Add lag and rolling features per (location_id, product_id).
        NOTE: df must be sorted by date before calling this.
        r	      lag_1   lag_7   lag_14)windowmin_periodsroll_7_mean   roll_28_mean)r5   r7   r9   r<   r>   g        )r$   sort_valuesgroupbyshiftrollingmeanfillna)r   r1   r2   glag_colss        r   _add_time_series_featuresz1CompanyDemandForecaster._add_time_series_features0   s     WWY^^J&12JJz":. ggaj7ggaj7wwr{8 GGAJ..aQ.GLLN=WWQZ//rq/INNP>
 O(|**3/8	r   c                 "   ||d   |k(     j                         }|j                  rt        d|       | j                  |      }ddg}| j	                  ||d      }g d}d}||   }||   }ddg}	|D 
cg c]	  }
|
|	vs|
 }}
t        dt        d	
      |	fdd|fg      }t        ddd      }t        d|fd|fg      }|j                  ||       | j                  |      }t        j                  ||||d|       t        |      S c c}
w )zo
        df must contain:
        ['company_id', 'location_id', 'product_id', 'date', 'qty_sold', ...]
        r   z&No training data found for company_id=location_id
product_idqty_soldr1   r2   )rI   rJ   r   r   r   r    r#   r5   r7   r9   r<   r>   catignore)handle_unknownnumpassthrough)transformersi,  *   )n_estimatorsrandom_staten_jobspreprocessormodel)steps)pipelinefeature_colsr1   r2   )r$   empty
ValueErrorr0   rG   r   r   r   r   fitr   joblibdumpstr)r   r   r   
df_companyr1   r\   r2   Xycategorical_featurescnumeric_featuresrX   rY   r[   
model_paths                   r   train_company_modelz+CompanyDemandForecaster.train_company_modelQ   sy    <(J67<<>
Ej\RSS ,,Z8
 $\2
33!! 4 


  
|$z" -|<'3U!q@T7TAUU(X>@TU'78
 &
 .% 
 	Q %%j1
$ ,((	 	
 :I Vs   5	D?D
history_dfrI   rJ   future_datesc           
      "   | j                  |      }|j                         st        d| d      t        j                  |      }|d   }|d   }	|d   }
||d   |k(  |d   |k(  z  |d   |k(  z     j                         }|j                  rt        d	| d
| d|       t        j                  ||||t        d      gt        |      z  d      }t        j                  ||gd      }| j                  |      }| j                  ||
d      }|d   j                  t        j                   |            }||   j                         }||	   }|j#                  |      }|D cg c]  }t%        j&                  |       c}|d<   |g d   S c c}w )a%  
        Predict demand for a given (company, location, product) for future_dates.

        history_df must contain at least:
        ['company_id', 'location_id', 'product_id', 'date', 'qty_sold']
        for THAT company/location/product, up to the DAY BEFORE the first future date.
        z$Model file not found for company_id=z. Train it first.r[   r\   r1   r   rI   rJ   zNo history found for company=z, location=z
, product=nan)r   rI   rJ   r	   rK   T)ignore_indexrK   rL   r	   forecast_qty)r   rI   rJ   r	   rp   )r   existsFileNotFoundErrorr`   loadr$   r]   r^   r%   	DataFramefloatlenconcatr0   rG   r/   r&   predictmathceil)r   r   rk   rI   rJ   rl   ri   savedr[   r\   r1   df_hist	df_futuredf_allmask_futuredf_future_featsX_futurepredspreds                      r   predict_for_seriesz*CompanyDemandForecaster.predict_for_series   s     %%j1
  "#6zlBST  J'$^,<(
 %3-(K79,':57
 $&	 	 ==/
| <'=
:,@  LL(*($"5\NS->>	
	 GY/dC ((0 //!! 0 
 Vn))"..*FG -224"<0  *GL*Mt499T?*M'O
 	
 +Ns   #FN)models)rK   )__name__
__module____qualname____doc__rb   r   r-   r   r   staticmethodr%   rt   r0   r   rG   rj   r	   r    r   r   r   r      s   :# :
Cc Cd C r||     %LLI  
	 @Nbll N N NbL
L
 LLL
 	L

 L
 4jL
 
L
r   r   )pathlibr   typingr   r`   pandasr%   ry   sklearn.ensembler   sklearn.composer   sklearn.preprocessingr   sklearn.pipeliner   datetimer	   r   r   r   r   <module>r      s/         2 - / % a
 a
r   