cleanUrl: /prophet/docker
cherry: true

Dockerfile

FROM lppier/docker-prophet
RUN pip install -U pip influxdb tox workalendar
ENTRYPOINT ["python"]

Create docker image

[server ~]$ sudo docker build -t prophet-influxdb-phil:202005081129 .
...
[server ~]$ sudo docker images
REPOSITORY                                TAG                 IMAGE ID            CREATED             SIZE
prophet-influxdb-phil                     202005081129        55794cdad742        7 seconds ago       1.03GB
lppier/docker-prophet                     latest              2a09f62db268        15 months ago       1.01GB

Generate Forecasting data

Prophet

class InfluxDataFrameIO(DataframeIO):
  
    def read(self, days: int = 7):
        r = self.client.query(""" SELECT last(portal) as y FROM "{:1}" WHERE time >= now() - {:2}d GROUP BY time(1m) fill(null)""".format(self.influxdb_input_measurement, days))
        df = pd.DataFrame(r.get(self.influxdb_input_measurement), columns=['y'])
        df = df.tz_convert(None)
        df.reset_index(inplace=True)
        df.rename(inplace=True, columns={'index':'ds'})       
        return df[:-1]
  
    def write(self, outputDataFrame: pd.DataFrame):
        self.client.write_points(
            outputDataFrame,
            measurement = self.influxdb_output_measurement,
            tags=self.influxdb_output_tags,
            time_precision='m'
        )
  
def forecast(df: pd.DataFrame):
  
    m = Prophet(
        holidays=get_holidays(),
        holidays_prior_scale=100,
        interval_width=0.99,
        daily_seasonality=10,
        weekly_seasonality=20,
    )
    m.fit(df)
  
    # <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html>
    future = m.make_future_dataframe(
        periods=180,
        freq='1T' # Minute
    )
  
    forecast = m.predict(future)
      
    out = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    o = out.set_index('ds')
  
    return o
  
if __name__ == "__main__":
  
    io = InfluxDataFrameIO(
        client=DataFrameClient(
            host='influxdb',
            port=8086,
            username='****',
            password='****',
            database='****'
        ),
        influxdb_input_measurement='sso-session',
        influxdb_output_measurement='sso-session',
        influxdb_output_tags = {
            'forecast': 'sso-prophet-20200508'
        }
    )
  
    # Get forecasting using 28 days of data
    inputDataFrame = io.read(days=28)
    outputDataFrame = forecast(inputDataFrame)
    # Store forecasted data info InfluxDB
    io.write(outputDataFrame)

Run

[server ~]$ time sudo docker run -it --rm -v $(pwd):/app prophet-influxdb-phil:202005081129 ssoSessionForecast.py
INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
Initial log joint probability = -460.372
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes
      99       38790.9     0.0447242       3752.01      0.8642      0.8642      111  
...
...
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes
    3720       41037.3   8.66095e-05       361.545   5.573e-07       0.001     4276  LS failed, Hessian reset
    3778       41037.4   2.00873e-06       65.3976      0.8632      0.8632     4344  
Optimization terminated normally:
  Convergence detected: relative gradient magnitude is below tolerance
 
real    5m39.738s
user    0m0.052s
sys     0m0.071s

Exception

National and Company Holidays

https://facebook.github.io/prophet/docs/seasonality,_holiday_effects,_and_regressors.html#modeling-holidays-and-special-events

For instance, if you wanted to include Christmas Eve in addition to Christmas you’d include lower_window=-1,upper_window=0.If you wanted to use Black Friday in addition to Thanksgiving, you’d include lower_window=0,upper_window=1.You can also include a column prior_scale to set the prior scale separately for each holiday, as described below.

To Do: Using a separate table holding company holidays

def get_holidays():
    from workalendar.asia import SouthKorea
  
    cal = SouthKorea()
    y = pd.DataFrame(cal.holidays(2020), columns=['ds', 'holiday'])
    # y.insert(2, 'lower_window', 0)
    # y.insert(3, 'upper_window', 1)
     
    gsshop = pd.DataFrame({
        'ds': pd.to_datetime(['2020-05-01']),
        'holiday': 'company',       
        # 'lower_window': 0, # No need if you want to set only one day to except
        # 'upper_window': 1, # No need if you want to set only one day to except
    })
 
    holiday = pd.concat((y, gsshop)) # Concat holidays data to exclude
    return holiday

Increase weight on holidays