Gators In Subs

On this site you will find statistical data about the KotakuInAction, GamerGhazi and Gaming subreddits. It was taken around 14:00 UTC on Sunday, 2015-07-19 by looking at the users commenting on the top 10 threads from the last 24 hours (instead of currently "hot" 10). I believe that this should give a better and bigger sample since all three subs have participants from all over the world and using the "hot" list could bump off popular but slightly older content. In my opinion the day was fairly regular and none of the threads reached all. I might repeat this at some point.

Thanks to Audioburn who did a similar analysis of the Coontown sub. I'm using a modified version of his Python scripts found at the bottom of this page.

Feel free to do whatever you want with the data. I highly recommend downloading it if you want to take a closer look, pretty much every spreadsheet application should be able to read the files.

The karma_ columns show how much submission/comment karma was earned from all users in the sample per sub. The count_columns show how many submissions or comments have been made per sub. The participants_ columns show how many users in the sample have participated (commented or submitted) once, 5 times or 20 times in different subreddits. Row 2 in the participants_subreddits_1 colum also shows how many users were in the sample: 509 for KotakuInAction, 156 in GamerGhazi and 1522 in Gaming

Results

Analysis & Conversion scripts

analyze.py

#!/usr/bin/env python2
import sys
import praw
import json
users = []
submissions = []
r = praw.Reddit(user_agent='subreddit_analysis')
subreddit = r.get_subreddit(sys.argv[1])
#get submission object ids
for i,submission in enumerate(subreddit.get_top_from_day(limit=10)):
    print 'getting submission object %s' % (i)
    submissions.append(r.get_submission(submission_id=submission.id))
root_comments = []
for i,s in enumerate(submissions):
    print 'getting comments %s of %s' % (i, len(submissions))
    for c in s.comments:
        root_comments.append(c)
def get_comments(comments,level):
    for i,c in enumerate(comments):
        try:
            print 'getting comment count: %s in level %s' % (i,level)
            if c.author.name not in users:
                users.append(c.author.name)
        except AttributeError:				
            print 'nada'
        if hasattr(c,'replies'):
            level += 1
            get_comments(c.replies,level)

get_comments(root_comments,0)
kb_submissions = {}
cb_submissions = {}
kb_comments = {}
cb_comments = {}
a_subs_1 = {}
a_subs_5 = {}
a_subs_20 = {}
for idx,username in enumerate(users):
    try: 
        print 'getting info for %s, %s of %s' % (username,idx+1,len(users))
        user = r.get_redditor(username)
        submissions = user.get_submitted(limit=None)
        comments = user.get_comments(limit=None)
        user_subs = {}
        for s in submissions:
            subreddit = s.subreddit.display_name
            kb_submissions[subreddit] = (kb_submissions.get(subreddit, 0) + s.score)
            cb_submissions[subreddit] = (cb_submissions.get(subreddit, 0) + 1)
            user_subs[subreddit] = (user_subs.get(subreddit, 0) + 1)
        for c in comments:
            subreddit = c.subreddit.display_name
            kb_comments[subreddit] = (kb_comments.get(subreddit, 0) + c.score)
            cb_comments[subreddit] = (cb_comments.get(subreddit, 0) + 1)
            user_subs[subreddit] = (user_subs.get(subreddit, 0) + 1)
        for csub in user_subs:
            a_subs_1[csub] = (a_subs_1.get(csub, 0) + 1)
            if user_subs[csub] > 4:
                a_subs_5[csub] = (a_subs_5.get(csub, 0) + 1)
            if user_subs[csub] > 19:
                a_subs_20[csub] = (a_subs_20.get(csub, 0) + 1)
    except Exception  as e:
        print(e)
        print 'user deleted his/her account, smart'
mydata = {
    'karma_submissions':kb_submissions,
    'karma_comments':kb_comments,
    'count_submissions':cb_submissions,
    'count_comments':cb_comments,
    'participants_subreddits_1':a_subs_1,
    'participants_subreddits_5':a_subs_5,
    'participants_subreddits_20':a_subs_20,
    'users':users,
}
#save object to disk as json 
with open(sys.argv[1] + '.json','w') as fp:
    json.dump(mydata,fp)

tocvs.py

#!/usr/bin/env python2
import sys
import json
import csv
from collections import OrderedDict

sorted_data = {}
with open(sys.argv[1],'r') as fp:
    data = json.load(fp)

for key in data.keys():
    if hasattr(data[key], 'items'):
        keydict = OrderedDict(sorted(data[key].items(), key=lambda x: x[1]))
        sorted_data[key] = keydict

table = {}
for col in sorted_data:
    table[col] = []
    for key, val in sorted_data[col].items():
        table[col].append([str(val) + ' ' + key])
    table[col].reverse()

data_writer = csv.writer(open(sys.argv[1].rstrip('json') + 'csv', 'wb'))

col_names = ['karma_submissions','karma_comments','count_submissions','count_comments','participants_subreddits_1','participants_subreddits_5','participants_subreddits_20']
data_writer.writerow(col_names)

go = True
idx = 0

while go:
    row = []
    go = False
    for col in col_names:
        try:
            row.append(table[col][idx][0])
            go = True
        except IndexError:
            row.append('')
    data_writer.writerow(row)
    idx += 1