Data Modeling for 2.1 Cassandra

classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

Data Modeling for 2.1 Cassandra

Neha Trivedi
Helle all,
I was wondering which data model of the Three describe below better in terms of performance. Seems 3 is good.

#1. log with 3 Index

CREATE TABLE log (
        id int PRIMARY KEY,
        first_name set<text>,
    last_name set<text>,
    dob set <text>
   );
CREATE INDEX log_firstname_index ON test.log (first_name);
CREATE INDEX log_lastname_index ON test.log (last_name);
CREATE INDEX log_dob_index ON test.log (dob);
INSERT INTO log(id, first_name,last_name) VALUES ( 3, {'rob'},{'abbate'});
INSERT INTO log(id, first_name,last_name) VALUES ( 4, {'neha'},{'dave'});
select id from log where first_name contains 'rob';
select id from log where last_name contains 'abbate';

#2. log with UDT

CREATE TYPE test.user_profile (
    first_name text,
    last_name text,
    dob text
);

CREATE TABLE test.log_udt (
    id int PRIMARY KEY,
    userinfo set<frozen<user_profile>>
);
CREATE INDEX log_udt1__index ON test.log_udt1 (userinfo);
INSERT INTO log_udt1 (id, userinfo ) values ( 1,{first_name:'rob',last_name:'abb',dob: 'dob'});
INSERT INTO log_udt1 (id, userinfo ) values ( 2,{first_name:'neha',last_name:'dave',dob: 'dob1'});

select * FROM log_udt1 where userinfo = {first_name: 'rob', last_name: 'abb', dob: 'dob'};

This will not do query like : select id from log_fname where first_name contains 'rob';

#3. log with different Tables for each


CREATE TABLE log_fname (
        id int PRIMARY KEY,
        first_name set<text>,
   );
CREATE INDEX log_firstname_index ON test.log_fname (first_name);
CREATE TABLE log_lname (
        id int PRIMARY KEY,
    last_name set<text>,
   );
CREATE INDEX log_lastname_index ON test.log_lname (last_name);
CREATE TABLE log_dob (
        id int PRIMARY KEY,
    dob set <text>
   );
CREATE INDEX log_dob_index ON test.log_dob (dob);

INSERT INTO log_fname(id, first_name) VALUES ( 3, {'rob'});
INSERT INTO log_lname(id, last_name) VALUES ( 4, {'dave'});
select id from log_fname where first_name contains 'rob';
select id from log_lname where last_name contains 'abbate';


Regards
Neha
Reply | Threaded
Open this post in threaded view
|  
Report Content as Inappropriate

RE: Data Modeling for 2.1 Cassandra

Peer, Oded

In general your data model should match your queries in Cassandra.

In the examples you provided the queries are by name, not by ID, so I don’t see much use in using ID as the primary key.

Without much context, like why you are using SET or if queries must specify both first_name and last_name which is not supported in option 2 , I think it would make sense to use the following model for your data:

 

CREATE TABLE users (

   first_name text,

   last_name text,

   dob text,

   id int

   PRIMARY KEY ((first_name, last_name)) // Note this defines a composite partition key by using an extra set of parentheses

);

INSERT INTO users(first_name, last_name, id) values (‘neha’, ‘dave’, 1);
SELECT * FROM users where first_name = 'rob' and last_name = 'abb';

 

 

 

From: Neha Trivedi [mailto:[hidden email]]
Sent: Thursday, April 30, 2015 10:16 AM
To: [hidden email]
Subject: Data Modeling for 2.1 Cassandra

 

Helle all,
I was wondering which data model of the Three describe below better in terms of performance. Seems 3 is good.

#1. log with 3 Index

CREATE TABLE log (
        id int PRIMARY KEY,
        first_name set<text>,
    last_name set<text>,
    dob set <text>
   );
CREATE INDEX log_firstname_index ON test.log (first_name);
CREATE INDEX log_lastname_index ON test.log (last_name);
CREATE INDEX log_dob_index ON test.log (dob);
INSERT INTO log(id, first_name,last_name) VALUES ( 3, {'rob'},{'abbate'});
INSERT INTO log(id, first_name,last_name) VALUES ( 4, {'neha'},{'dave'});
select id from log where first_name contains 'rob';
select id from log where last_name contains 'abbate';

#2. log with UDT

CREATE TYPE test.user_profile (
    first_name text,
    last_name text,
    dob text
);

CREATE TABLE test.log_udt (
    id int PRIMARY KEY,
    userinfo set<frozen<user_profile>>
);
CREATE INDEX log_udt1__index ON test.log_udt1 (userinfo);
INSERT INTO log_udt1 (id, userinfo ) values ( 1,{first_name:'rob',last_name:'abb',dob: 'dob'});
INSERT INTO log_udt1 (id, userinfo ) values ( 2,{first_name:'neha',last_name:'dave',dob: 'dob1'});

select * FROM log_udt1 where userinfo = {first_name: 'rob', last_name: 'abb', dob: 'dob'};

This will not do query like : select id from log_fname where first_name contains 'rob';


#3. log with different Tables for each


CREATE TABLE log_fname (
        id int PRIMARY KEY,
        first_name set<text>,
   );
CREATE INDEX log_firstname_index ON test.log_fname (first_name);
CREATE TABLE log_lname (
        id int PRIMARY KEY,
    last_name set<text>,
   );
CREATE INDEX log_lastname_index ON test.log_lname (last_name);
CREATE TABLE log_dob (
        id int PRIMARY KEY,
    dob set <text>
   );
CREATE INDEX log_dob_index ON test.log_dob (dob);

INSERT INTO log_fname(id, first_name) VALUES ( 3, {'rob'});
INSERT INTO log_lname(id, last_name) VALUES ( 4, {'dave'});
select id from log_fname where first_name contains 'rob';
select id from log_lname where last_name contains 'abbate';

Regards

Neha

Loading...