finished subsample code,

...this will be put into a nightly cronjob to generate 100,000 and
1,000,000 row subsamples of main GAMES table
This commit is contained in:
Gregory Shikhman 2009-05-10 05:24:04 +00:00
parent 7c30fd6479
commit 7b24b82027
2 changed files with 5 additions and 29 deletions

View File

@ -30,42 +30,18 @@ def sample(size):
#look for an existing table with this sample size and drop it if it exists, then create a new one
tblname = configuration.DB_TABLE_PREFIX+TBLSTRING+str(size)
curs.execute("SELECT count(*) FROM information_schema.tables WHERE table_schema = 'corn' AND table_name = '"+tblname+"'")
curs.execute("SELECT count(*) FROM information_schema.tables WHERE table_schema = 'corn' AND table_name = %s",(tblname,))
results = curs.fetchall()
exists = results[0][0] == 1
if exists:
curs.execute("DROP TABLE IF EXISTS "+tblname)
curs.execute("""
CREATE TABLE `"""+tblname+"""` (
`game_id` int(11) NOT NULL auto_increment,
`timestamp` datetime NOT NULL,
`user_id` char(14) NOT NULL,
`serial` char(18) NOT NULL,
`platform` char(8) default NULL,
`version` char(14) default NULL,
`campaign` char(40) default NULL,
`difficulty` char(20) default NULL,
`gold` int(11) default NULL,
`turns` int(11) default NULL,
`scenario` char(40) default NULL,
`start_turn` int(11) default NULL,
`time` int(11) default NULL,
`result` enum('victory','defeat','quit') default NULL,
`end_time` int(11) default NULL,
`end_gold` int(11) default NULL,
`end_turn` int(11) default NULL,
PRIMARY KEY (`game_id`)
) ENGINE=MyISAM AUTO_INCREMENT=2450740 DEFAULT CHARSET=utf8 """)
curs.execute("CREATE TABLE "+tblname+" LIKE GAMES")
#randomly pick size number of entries from the main DB and put them into this sample
choices = random.sample(range(1,max_id),size)
for c in choices:
curs.execute("SELECT * FROM GAMES WHERE `game_id`=%s",c)
results = curs.fetchall()
if len(results) != 0:
#print results[0]
curs.execute("""INSERT INTO %s (game_id,timestamp,user_id,serial,platform,version,
campaign,difficulty,gold,turns,scenario,start_turn,time,result,end_time,end_gold,end_turn) VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",results[0])
curs.execute("INSERT INTO "+tblname+" SELECT * FROM GAMES WHERE `game_id`=%s",c)
conn.close()
sample(100000)
sample(10**6) #1 mil row subsample
sample(10**5) #100k row subsample