BigData-Spielchen

class Database():
  def __init__(self, own_area_code=""):
    self.dbname=DATABASENAME

    if os.path.exists(self.dbname)==False:      # wenn Datenbank-Datei noch nicht existiert -> neu anlegen
      self.connection=sqlite3.connect(self.dbname)
      #self.connection.text_factory=sqlite3.OptimizedUnicode
      cursor=self.connection.cursor()

      # id="2078835943" lat="54.8068123" lon="9.5158941" version="2"
      # timestamp="2017-04-23T17:19:57Z" changeset="48066661"
      # uid="5708153" user="dede67"/>
      cursor.execute('CREATE TABLE Nodes' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  lat       REAL,' \
                          '  lon       REAL,' \
                          '  version   INTEGER,' \
                          '  timestamp INTEGER,' \
                          '  changeset INTEGER,' \
                          '  uid INTEGER)')
      cursor.execute('CREATE INDEX Nodes_uid_idx ON Nodes (uid ASC)')

      cursor.execute('CREATE TABLE Users' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  uid       INTEGER,' \
                          '  user      VARCHAR)') # uid ist temporär (zwecks Test, ob uid im XML unique auf user passt)
      cursor.execute("CREATE UNIQUE INDEX nodupe1 ON Users (uid, user)")

      # 'k=', 'TMC:cid_58:tabcd_1:Class', ' v=', 'Point'
      cursor.execute('CREATE TABLE Tags' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  k         VARCHAR,' \
                          '  v         VARCHAR,' \
                          '  node_id   INTEGER,' \
                          '  rel_id    INTEGER,' \
                          '  way_id    INTEGER)')

      # 'id=', '1234567', ' version=', '1', ' timestamp=', '2018-08-20T21:57:19Z', 
      # ' changeset=', '48066661', ' uid=', '5708153', ' user=', 'dede67'
      cursor.execute('CREATE TABLE Relations' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  version   INTEGER,' \
                          '  timestamp INTEGER,' \
                          '  changeset INTEGER,' \
                          '  uid INTEGER)')

      # 'type=', 'node', ' ref=', '5844791129', ' role=', 'stop'
      cursor.execute('CREATE TABLE Members' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  rel_id    INTEGER,' \
                          '  type      VARCHAR,' \
                          '  ref       INTEGER,' \
                          '  role      VARCHAR)')

      # 'id=', '123456789', ' version=', '4', ' timestamp=', '2015-03-26T22:16:40Z', 
      # ' changeset=', '48066661', ' uid=', '5708153', ' user=', 'dede67'
      cursor.execute('CREATE TABLE Ways' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  version   INTEGER,' \
                          '  timestamp INTEGER,' \
                          '  changeset INTEGER,' \
                          '  uid INTEGER)')

      # 'ref=', '1794045679'
      cursor.execute('CREATE TABLE Nds' \
                          ' (id        INTEGER NOT NULL PRIMARY KEY,' \
                          '  way_id    INTEGER,' \
                          '  ref       INTEGER)')

      self.connection.commit()
    else:
      self.connection=sqlite3.connect(self.dbname)

  # ###########################################################
  #
  def __utcStringToTimestamp(self, utc_str):
    ts=int(parser.parse(utc_str).timestamp())
    #print(ts)
    #print(datetime.datetime.utcfromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S'))
    return(ts)

  # ###########################################################
  #
  def __insertUser(self, uid, user):
    cursor=self.connection.cursor()
    try:
      cursor.execute('INSERT INTO Users (uid, user) VALUES (?, ?)', (uid, user))
    except:
      pass


  # ###########################################################
  #
  def insertNode(self, d):
    #print((int(d["id"]), float(d["lat"]), float(d["lon"]), int(d["version"]), self.__utcStringToTimestamp(d["timestamp"]), int(d["changeset"]), int(d["uid"])))
    cursor=self.connection.cursor()
    cursor.execute('INSERT INTO Nodes (id, lat, lon, version, timestamp, changeset, uid)' \
                   ' VALUES (?, ?, ?, ?, ?, ?, ?)',
                   (int(d["id"]), float(d["lat"]), float(d["lon"]), int(d["version"]), self.__utcStringToTimestamp(d["timestamp"]), int(d["changeset"]), int(d["uid"])))
    self.__insertUser(d["uid"], d["user"])

[... u.s.w. ...]

  # ###########################################################
  #
  def commit(self):
    self.connection.commit()
BigData-Spielchen

Inhaltsverzeichnis

Daten beschaffen und vorverarbeiten

Voranalysen

die Daten in eine Datenbank laden

indizieren der Datenbank

erste Queries

Struktur der Daten