I have a text file containing these line for example
2017-03-14 10:33:56.661 +0800, Number of child: 898
2017-03-14 10:33:56.661 +0800, Volume of water: 86.75
2017-03-14 10:33:56.661 +0800, Size of field: 99.58
2017-03-14 10:34:01.672 +0800, Number of Dog: 779
2017-03-14 10:34:01.672 +0800, Number of child: 898
2017-03-14 10:34:01.672 +0800, Volume of water: 86.75
2017-03-14 10:34:01.672 +0800, Size of field: 99.58
2017-03-14 10:34:06.677 +0800, Number of Dog: 789
....
....
I want to store this data in using mongodb and mongoalchemy. I want it to look like this
{
"_id" : ObjectId("58d8e7e2a0bae63d30e8601a"),
"Timestamp" : ISODate("2017-03-14T10:33:56.661Z"),
"Data": "Number of child"
"Value" : 898,
"Timezone" : "+0800"
}
{
"_id" : ObjectId("58d8e7e2a0bae63d30e8601a"),
"Timestamp" : ISODate("2017-03-14T10:33:56.661Z"),
"Data": "Volume of water"
"Value" : 86.75,
"Timezone" : "+0800"
}
{...
....
....
}
My code so far
model.py
from mongoalchemy.document import Document
from mongoalchemy.fields import *
class the_data(Document):
Timestamp = DateTimeField()
Timezone = StringField()
Data = StringField()
Value = FloatField()
savetodb.py
from mongoalchemy.session import Session
from model import text_data, html_data
import datetime
import os
path = "all_files/"
def save_to_db():
dir_path = os.listdir(path)
session = Session.connect('mockdb') # mongodb's database name
session.clear_collection(the_data)
for file in dir_path:
if file.endswith(".txt"):
f = open(path + file, "r")
for line in f:
word = ["Number of child", "Number of Dog ", "Volume of Water", "Size of field"]
if word[0] in line:
timestamp = line[0:23].strip()
t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
tzone = [i for i in line.split() if i.startswith("+")]
timezone = ''.join(tzone)
data_child = line.split(": ", 1)[0]
num_child = line.split(": ", 1)[1]
numchild = float(num_child)
data_save = the_data(Timestamp=t,
Timezone=timezone,
Data=data_child,
Value=numchild)
session.save(data_save)
elif word[1] in line:
timestamp = line[0:23].strip()
t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
tzone = [i for i in line.split() if i.startswith("+")]
timezone = ''.join(tzone)
data_dog = line.split(": ", 1)[0]
num_dog = line.split(": ", 1)[1]
numdog = float(num_child)
data_save = the_data(Timestamp=t,
Timezone=timezone,
Data=data_dog,
Value=numdog)
session.save(data_save)
elif word[2] in line:
timestamp = line[0:23].strip()
t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
tzone = [i for i in line.split() if i.startswith("+")]
timezone = ''.join(tzone)
data_water = line.split(": ", 1)[0]
vol_water = line.split(": ", 1)[1]
volwater = float(num_child)
data_save = the_data(Timestamp=t,
Timezone=timezone,
Data=data_water,
Value=volwater)
session.save(data_save)
elif word[3] in line:
timestamp = line[0:23].strip()
t = datetime.datetime.strptime(timestamp, "%Y-%m-%d %H:%M:%S.%f")
tzone = [i for i in line.split() if i.startswith("+")]
timezone = ''.join(tzone)
data_field = line.split(": ", 1)[0]
size_field = line.split(": ", 1)[1]
sizefield = float(num_child)
data_save = the_data(Timestamp=t,
Timezone=timezone,
Data=data_field,
Value=savefield)
session.save(data_save)
save_to_db()
I keep the text file under subfolder named all_files
I run above script and here is what I got
Traceback (most recent call last):
File "tasks.py", line 88, in <module>
save_to_db()
File "tasks.py", line 38, in save_to_db
Value=numchild)
File "/Users/Fang/workspace/test_celery3/celery3-env/lib/python2.7/site-packages/mongoalchemy/document.py", line 230, in __init__
raise ExtraValueException(k)
mongoalchemy.exceptions.ExtraValueException: Data
What is the meaning of the error? Did I miss something in the script? While at it, am I going on the right direction to properly store the data in the database?
Additional information : There is even no database mockdb
after I run the script.
Thank you for your help.