diff --git a/app.py b/app.py index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..4cfa92ffc5dd95b82c9722acee4d5b4f0e1a4f60 100644 --- a/app.py +++ b/app.py @@ -0,0 +1,38 @@ +import streamlit as st +import pandas as pd +import numpy as np + +st.title('Welcome in Recomsys') + +DATE_COLUMN = 'date/time' +DATA_PATH = ('/Users/audreyghilain/VsCode_Workspace/recomsys/data/small/content/movies.csv') +#df_movies['annee'] = df_movies['title'].str.extract(r'\((.{4})\)') +#df_movies['annee'] = pd.to_numeric(df_movies['annee'], errors='coerce') + +@st.cache_data +def load_data(nrows): + data = pd.read_csv(DATA_PATH, nrows=nrows) + data['annee'] = data['title'].str.extract(r'\((.{4})\)') + #lowercase = lambda x: str(x).lower() + #data.rename(lowercase, axis='columns', inplace=True) + #data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN]) + return data + +data_load_state = st.text('Loading data...') +data = load_data(10000) +data_load_state.text("Done! (using st.cache_data)") + +if st.checkbox('Show raw data'): + st.subheader('Raw data') + st.write(data) + +st.subheader('Number of pickups by hour') +hist_values = np.histogram(data["annee"].dt.hour, bins=24, range=(0,24))[0] +st.bar_chart(hist_values) + +# Some number in the range 0-23 +#hour_to_filter = st.slider('hour', 0, 23, 17) +#filtered_data = data[data["annee"].dt.hour == hour_to_filter] + +#st.subheader('Map of all pickups at %s:00' % hour_to_filter) +#st.map(filtered_data) \ No newline at end of file