Machine Learning – Clustering

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495# coding: utf-8 import base64 import psycopg2 import re from urllib.parse import unquote import hashlib from psycopg2 import IntegrityError from os import walk regex = r"(?P<hostname>[\d\.)]+)\s(?P<logname>.*?)\s(?P<remote_user>.*?)\s\[(?P<date>\d{2}\/[A-z]{3}\/\d{4}):(?P<time>\d{2}:\d{2}:\d{2})\s(?P<timezone>[-|+]\d{3,4})]\s"(?P<method>[A-Z]*)(?P<request>.*?)(?P<http_version>[A-Z]*\/1.\d)"\s(?P<response_code>\d+)\s(?P<response_size>.*?)\s"(?P<referer>.*?)"\s"(?P<user_agent>.*?)"" # Expressao regular para analise da URL con = psycopg2.connect(host=’192.168.1.1′, database=’mllab’, user=’postgres’, password=’postgres’) cur = con.cursor() data_source = "/home/ricardomaia/data_source/" logfiles = [] for (dirpath, dirnames, filenames) in walk(data_source):   […]