Newer
Older
docker-analytics / examples / clickstream_producer.py
Nigel Stanger on 17 May 2019 525 bytes Added examples
#!/usr/local/bin/python

import time
from datetime import datetime, timedelta

terminate = datetime.now() + timedelta(seconds=120)
refresh = 0

from kafka import KafkaProducer
producer = KafkaProducer(bootstrap_servers='kafka:9092')

with open("2017_01_en_clickstream.tsv", "r") as clicks:
    count = 0
    for line in clicks:
        if count > 0:
            producer.send("clickstream", bytes(line, encoding="utf-8"))
        count += 1
        if count % 10000 == 0:
            print(count)
        time.sleep(refresh)