Bild, leninartig
2018-06-30 12:00:00, by unknown

This is a production notebook. It uses data gathered from a PBX and internal routing and will generate reports for TT, ACW and so on.
It is meant to be customizeable, so one can easily adjust

  • the time range that should be evaluated
  • different plots are available for
    • hotline data
    • agent data
    • call distribution

The script can also easily iterate time ranges over the whole dataset and produce plots for each

This script makes little sense to use from a CLI, it's meant to run in an IPython Environment

script code

a markdown dump straight out of the notebook:

%load_ext autoreload
%autoreload 2

## Import necessary modules
import os,sys
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.dates import date2num, AutoDateFormatter, AutoDateLocator, WeekdayLocator, MonthLocator, DayLocator, DateLocator, DateFormatter
from matplotlib.dates import MO, TU, WE, TH, FR, SA, SU
from matplotlib.ticker import AutoMinorLocator, AutoLocator, FormatStrFormatter, ScalarFormatter
import numpy as np
import datetime, calendar
from datetime import timedelta
import matplotlib.patches as mpatches
from itertools import tee
from traitlets import traitlets

sys.path.append(os.path.abspath('/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/entwuerfe/xls_testruns/lib/'))
from ce_funclib import determine_kernzeit as dtkz
from ce_funclib import continuity_check


from ipywidgets import widgets, interact, interactive, fixed, interact_manual, Layout
from IPython.display import display
#%matplotlib inline
%matplotlib tk


## Import data frome pickle generated from muß ein file mit agentenstats sein
arcpth='/home/keuch/gits/keuch/code_box/pyt/spreadsheetparsing/test_stats/archiv/'

######## GET A LIST OF MATCHING .xls FILES FROM THE GIVEN DIRECTORY

def collectxlfiles(arcpath):
    xlfilelist=list()

    for xlfile in os.listdir(arcpath):
        if xlfile.startswith('CE_alle_Ag'):
            xlfileabs=os.path.join(arcpath,xlfile)
            xlfilelist.append(xlfileabs)
    return sorted(xlfilelist)

xlfilelist=collectxlfiles(arcpth)
#xlfilelist
#examplefile=xlfilelist[233]
###### TEST FOR DATA IN FILE, SORT OUT EMPTY FILES

## good dataframes do per definition not contain any zero values
## fill bad DFs with nan?

def filetoframe(exfile):
    exframe=pd.read_excel(exfile) # this is a regular pd.DataFrame
    datecell=exframe.iloc[0,1]
    sheet_datetime=pd.to_datetime(datecell,format='%d.%m %Y : %H')
    sheet_date=sheet_datetime.date()

    integritycheck=exframe.iloc[2,1] # files with data have "agenten" here, files with no calls have a 'nan'

    if integritycheck != 'Agenten':
        # if it's empty, keep date for filling it later
        print('Exception: ', end='')
        except_status='ex'

        usefulcols={0:'tstamp',1:'agent',3:'an',4:'be',22:'vl',24:'ht_float',29:'tt_float'} # map cols to decent names
        exframe=exframe.reindex(columns=sorted(usefulcols.keys()))
        exframe.rename(columns=usefulcols,inplace=True)        
        exframe=exframe[0:1] # strip text rows and the mangled sum row
        print(sheet_datetime)

        exframe['tstamp']=sheet_datetime
        exframe['date']=sheet_date
        exframe['agent']='nocalls_datum'
        exframe[['wd','ww','mm','yy']]=exframe['tstamp'].dt.strftime('%a,%W,%m,%Y').str.split(',',expand=True) # make ww,yy,mm,wd columns
        exframe['bz']=exframe['tstamp'].apply(dtkz)
        exframe['ort']=exframe['agent'].str[0] # split the identifier into useable columns
        exframe['id']='foobar' # split the identifier into useable columns

        # integers should be of appropriate datatype, we received them as strings
        # exframe[['vl','an','be','ww','mm','yy']]=exframe[['vl','an','be','ww','mm','yy']].astype(np.int64) #just for the beauty of it
        exframe.fillna(0, inplace=True) 
        exframe[['ww','mm','yy']]=exframe[['ww','mm','yy']].astype(np.int64) #just for the beauty of it
        #exframe.fillna(0, inplace=True) 
        return exframe,except_status

    else:
        except_status='reg'

        exframe.columns=range(0,30) # rename columns to a temporarily more readable format, fancy rename later
        usefulcols={0:'tstamp',1:'agent',3:'an',4:'be',22:'vl',24:'ht_float',29:'tt_float'} # map cols to decent names
        exframe=exframe[sorted(usefulcols.keys())] # skip cols and keep the ones we need
        exframe.rename(columns=usefulcols,inplace=True) # rename cols
        exframe=exframe[3:-1] # strip text rows and the mangled sum row
        exframe['tstamp']=pd.to_datetime(exframe['tstamp'],format=' %d.%m.%Y %H:%M ')
        exframe['date']=exframe['tstamp'].dt.date
        exframe[['wd','ww','mm','yy']]=exframe['tstamp'].dt.strftime('%a,%W,%m,%Y').str.split(',',expand=True) # make ww,yy,mm,wd columns
        exframe['bz']=exframe['tstamp'].apply(dtkz)

        exframe['ort']=exframe['agent'].str[0] # split the identifier into useable columns
        exframe['id']=exframe['agent'].str[-6:] # split the identifier into useable columns
        exframe['agent']=exframe['agent'].str[2:-7] # split the identifier into useable columns

        # integers should be of appropriate datatype, we received them as strings
        exframe[['vl','an','be','ww','mm','yy']]=exframe[['vl','an','be','ww','mm','yy']].astype(np.int64) #just for the beauty of it

        return exframe,except_status
framelist=list()
exceptionlist=list()
for xfile in xlfilelist:
    frame_from_file,except_status=filetoframe(xfile)
    if except_status=='ex':
        exceptionlist.append(xfile)
    framelist.append(frame_from_file)
Exception: 2017-04-17 00:00:00
Exception: 2017-05-14 00:00:00
Exception: 2017-11-19 00:00:00
Exception: 2017-12-03 00:00:00
Exception: 2017-12-10 00:00:00
Exception: 2018-03-04 00:00:00
Exception: 2018-03-11 00:00:00
Exception: 2018-04-08 00:00:00
#### produce a unified frame with all data and sort it by timstamp and agentname
bigframeii=pd.concat(framelist)
bigframeii.sort_values(['tstamp','agent'],inplace=True)
bigframeii.reset_index(drop=True,inplace=True) # there you go
# die exklusivlogins müssen zusammengelegt werden
unify_id={'gesinst':'995887','stanzju':'878457','papkeda':'891914'}
bigframeii.loc[bigframeii['id'] == unify_id['gesinst'],'agent'] = 'gesinst'
bigframeii.loc[bigframeii['id'] == unify_id['stanzju'],'agent'] = 'stanzju'
bigframeii.loc[bigframeii['id'] == unify_id['papkeda'],'agent'] = 'papkeda'
#### check, ob alle Daten(Tage) lückenlos sind
datenserie_uniq=bigframeii['date'].unique().tolist()
tage_bestand=len(datenserie_uniq)
tage_start=datenserie_uniq[0]
tage_ende=datenserie_uniq[-1:]

missing_dates=continuity_check(datenserie_uniq)
if not missing_dates:
    print('no dates are missing')
else:
    print('the following dates are not within the frame:')
    print(missing_dates)
no dates are missing
# get all agents available and create frames for kern and neben
allagents_list=sorted(bigframeii['agent'].unique())
allagents_list.extend(['Hagenow','Berlin','Alle'])
standorte=bigframeii.ort.unique().tolist()

we can't figure out individual calls anyway, since raw data calls have been grouped by hours already
so we can go on and group by days to figure out averages

bigframeii.head(2)
#Samstage = bigframeii.loc[(bigframeii['tstamp'].dt.dayofweek ==  5)
tstamp agent an be vl ht_float tt_float date wd ww mm yy bz ort id
0 2017-03-04 08:00:00 beckfra 1.0 1.0 0.0 2.3667 2.1333 2017-03-04 Sat 9 3 2017 n H 216694
1 2017-03-04 08:00:00 tetzlva 1.0 1.0 0.0 2.6833 2.6167 2017-03-04 Sat 9 3 2017 n B 613887
Datum_MIN = bigframeii.tstamp.dt.date.min()
Datum_MAX = bigframeii.tstamp.dt.date.max()

Datum_VON = datetime.date(2017,5,1)   # YY,MM,DD
Datum_BIS = datetime.date(2018,10,1)

Kernzeit = (bigframeii['bz'] ==  'k')
Nebnzeit = (bigframeii['bz'] ==  'n')

Berlin = (bigframeii['ort'] ==  'B')
Hagenow = (bigframeii['ort'] ==  'H')

Wunschzeitraum = ((bigframeii.tstamp.dt.date >= Datum_VON) & (bigframeii.tstamp.dt.date <= Datum_BIS))
#bigframeii.loc[Wunschzeitraum & Kernzeit & Hagenow]   # SUPER!
kzdata=bigframeii.loc[Wunschzeitraum & Kernzeit].copy()
kzdata_reindex=kzdata.set_index(['tstamp','ort']).copy() # timestamps und Standort als neue Indizes
kzdata_byday=kzdata_reindex.groupby([pd.TimeGrouper('D', level='tstamp'), pd.Grouper(level='ort')]).sum() # nach Tagen gruppiert und alle Zahlen summiert
def plotzi(frame):

    aht,aacw,att,nzb,bg ="#003873","#EE0042","#899EB2","#C7798F","#FEFFE2"
    sonntage=WeekdayLocator(byweekday=(6), interval=2)
    datevon=frame.index.get_level_values('tstamp').min().strftime('%d.%m.%y')
    datebis=frame.index.get_level_values('tstamp').max().strftime('%d.%m.%y')


    #### ------------------------------------ ####

    fig=plt.figure(figsize=(12,5))

    ax=fig.add_subplot(111)
    ax.margins(0,0)
    ax.set_facecolor(bg)
    ax.set_xlabel('Kernzeit / ceDis von '+datevon+' bis '+datebis)
    ax.xaxis.set_major_locator(sonntage)
    ax.xaxis.set_major_formatter(DateFormatter('%d.%m.%y'))
    ax.set_ylabel('Calls angenommen')


    ber=frame.xs('B', level=1, drop_level=True)['an']
    hgw=frame.xs('H', level=1, drop_level=True)['an']

    b=ax.bar(ber.index,ber.values, label='calls', color=nzb, width=1)
    h=ax.bar(hgw.index,hgw.values, label='calls', color=aht, width=0.6)

    plt.setp(ax.xaxis.get_majorticklabels(), rotation=45, horizontalalignment='right', size=6 )

    bercalls=int(ber.sum())
    hgwcalls=int(hgw.sum())
    allecalls=bercalls+hgwcalls
    anteilhgw=str(    format((hgwcalls/allecalls)*100,'.2f')   )  
    anteilber=str(    format((bercalls/allecalls)*100,'.2f')   )



    callsBH = mpatches.Patch(color='000000', label='Calls Ber+Hgw: '+str(allecalls))
    callsB = mpatches.Patch(color=nzb, label='Calls Ber: '+str(bercalls) +' ('+anteilber+'%)'        )
    callsH = mpatches.Patch(color=aht, label='Calls Hgw: '+str(hgwcalls) +' ('+anteilhgw+'%)'        )

    ax.legend(handles=[callsBH,callsB,callsH],fontsize=8,ncol=2,loc='upper right',borderaxespad=-2,framealpha=0.99)

    plt.subplots_adjust(bottom=0.25)
plotzi(kzdata_byday)

example plots:

agent data hotline data site related distribution