- This is a small program in Python. When running this program, it will
generate a text file with data analysis for the news website - The database used for this news website is PostgreSQL
- Python with version 2.7
- Follow this guidance to install Python on your operating system.
- Install VirtualBox and Vagrant
- Downloads the Vagrant Config File Here
- run the following command
$ vagrant up
$ vagrant ssh
- downloads the news website data here
- run the following command
$ cd /vagrant
$ psql -d news -f newsdata.sql
- The VM and database is set up, now you can run the python file using:
$ python tool.py
- Run thefollowing command on the PostgreSQL shell to create the views so that the python code can run properly
create view pageviews as
select path, count(*) as num
from log where path != '/'
group by path
order by num desc;
create view author_articles as
select authors.name, articles.slug
from authors, articles
where authors.id = articles.author;
create view date_status as
select time::date as date, status
from log
order by time;
create view date_errors as
select date, count(*) as errors
from date_status where status != '200 OK'
group by date
order by date;
create view date_views as
select date, count(*) as views
from date_status
group by date
order by date;
create view date_views_errors as
select date_views.date, date_views.views, date_errors.errors
from date_views, date_errors
where date_views.date = date_errors.date;