Revision: 116798 https://trac.macports.org/changeset/116798 Author: cal@macports.org Date: 2014-02-07 11:46:31 -0800 (Fri, 07 Feb 2014) Log Message: ----------- macports stats: move bin directory from level above, adjust for deployment Added Paths: ----------- branches/gsoc11-statistics/stats-server/bin/ branches/gsoc11-statistics/stats-server/bin/add_ports branches/gsoc11-statistics/stats-server/bin/generate_portspy branches/gsoc11-statistics/stats-server/bin/generate_seed branches/gsoc11-statistics/stats-server/bin/new_ports branches/gsoc11-statistics/stats-server/bin/populate.py Property changes on: branches/gsoc11-statistics/stats-server/bin ___________________________________________________________________ Added: svn:ignore + bundler erubis rackup rails rake rdoc ri sprockets thor tilt tt uuid Added: branches/gsoc11-statistics/stats-server/bin/add_ports =================================================================== --- branches/gsoc11-statistics/stats-server/bin/add_ports (rev 0) +++ branches/gsoc11-statistics/stats-server/bin/add_ports 2014-02-07 19:46:31 UTC (rev 116798) @@ -0,0 +1,78 @@ +#!/usr/bin/env ruby +require 'rubygems' +require 'active_record' +require 'fileutils' +#require 'trac4r' + +ROOT = File.expand_path(File.dirname(__FILE__) + "/../") +TIME_FILE = "#{ROOT}/sync-timestamp" +NEW_PORTS = "#{ROOT}/bin/new_ports" +PORT_INDEX = "/opt/local/var/macports/sources/rsync.macports.org/release/tarballs/ports" +RAILS_ROOT = "#{ROOT}" +MODE = 'production' + +require File.expand_path(RAILS_ROOT + '/app/models/category.rb', __FILE__) +require File.expand_path(RAILS_ROOT + '/app/models/port.rb', __FILE__) + + +if File.exists?(TIME_FILE) + $mtime = File.stat(TIME_FILE).mtime.to_i +else + $mtime = 0 +end + +FileUtils.touch(TIME_FILE) + +$ports = Array.new +$hashed_data = Hash.new + +db_info = YAML.load_file(File.expand_path(RAILS_ROOT + '/config/database.yml', __FILE__)) +#db_info['development']['database'] = RAILS_ROOT + "/" + db_info['development']['database'] +#puts db_info[MODE].to_yaml +ActiveRecord::Base.establish_connection(db_info[MODE]) +#trac = Trac.new(TRAC_URL, TRAC_USER, TRAC_PASS) + +fp = IO.popen("#{NEW_PORTS} -m #{$mtime} #{PORT_INDEX}") +new_ports = fp.read.split("\n") + +if new_ports.count > 0 + new_ports << "" #add last blank line +end + + +new_ports.each do |line| + unless (line == "") + data = line.match(/(\S+):\s+\{?(.+)\}?$/) + unless data.nil? #field missing, should record this if it happens + $hashed_data[data[1].to_sym] = data[2] + end + else + category_name = $hashed_data[:categories].try(:split, " ").try(:[], 0) + $category = Category.find_by_name(category_name) + if $category.nil? + $category = Category.new({:name => category_name}) + $category.save + end + + port = Port.find_by_name($hashed_data[:name]) + if port.nil? + port = Port.new + end + + port[:name] = $hashed_data[:name] + port[:path] = $hashed_data[:portdir] + port[:version] = $hashed_data[:version] + port[:description] = $hashed_data[:description] + port[:licenses] = $hashed_data[:license] + port[:category_id] = $category.id + port[:variants] = $hashed_data[:variants] + port[:maintainers] = $hashed_data[:maintainers] + port[:platforms] = $hashed_data[:platforms] + port[:categories] = $hashed_data[:categories] + + $ports << [$hashed_data, port] + port.save + puts "Saved #{port[:name]}, version #{port[:version]}" + $hashed_data = {} + end +end Property changes on: branches/gsoc11-statistics/stats-server/bin/add_ports ___________________________________________________________________ Added: svn:executable + * Added: branches/gsoc11-statistics/stats-server/bin/generate_portspy =================================================================== --- branches/gsoc11-statistics/stats-server/bin/generate_portspy (rev 0) +++ branches/gsoc11-statistics/stats-server/bin/generate_portspy 2014-02-07 19:46:31 UTC (rev 116798) @@ -0,0 +1,65 @@ +#!/usr/bin/env ruby + +### Generate a valid ports.py for use in populating the database with +### sample submissions. ports.py will be used by populate.py + +# This file adapted from generate_seed which is adapted from add_ports from MPWA + +# Note - this file must be executed from inside RAILS_ROOT +# otherwise it won't be able to connect to the database + +require 'rubygems' +require 'active_record' +require 'fileutils' + +ROOT = File.expand_path(File.dirname(__FILE__) + "/../") +BIN_ROOT = "#{ROOT}/bin" +RAILS_ROOT = "#{ROOT}/stats-server" +require File.expand_path(RAILS_ROOT + '/app/models/port.rb', __FILE__) + +$ports = Array.new +$hashed_data = Hash.new + +db_info = YAML.load_file(File.expand_path(RAILS_ROOT + '/config/database.yml', __FILE__)) +ActiveRecord::Base.establish_connection(db_info['development']) + + +class String + # Escape single quotes + def escape_single_quotes + self.gsub(/'/, "\\\\'") + end +end + +def esc(str) + if not str.nil? + str.escape_single_quotes + else + str + end +end + +path = "#{BIN_ROOT}/ports.py" +portfile = File.new(path, "w") + +if not portfile + puts "Unable to open #{path}" +end + +portfile.syswrite('port_list = [ ') + +# Output an array of dictionaries. Each dictionary represents a port +# The dicts have the keys name, version, variants. + +# Load all ports +ports = Port.all +ports.each do | port | + # Write the port + portfile.syswrite('{') + portfile.syswrite("\'name\': \'#{port.name}\', \'version\': \'#{port.version}\', \'variants\': \'#{port.variants}'") + portfile.syswrite('},') +end + +portfile.syswrite(']') + +portfile.close \ No newline at end of file Property changes on: branches/gsoc11-statistics/stats-server/bin/generate_portspy ___________________________________________________________________ Added: svn:executable + * Added: branches/gsoc11-statistics/stats-server/bin/generate_seed =================================================================== --- branches/gsoc11-statistics/stats-server/bin/generate_seed (rev 0) +++ branches/gsoc11-statistics/stats-server/bin/generate_seed 2014-02-07 19:46:31 UTC (rev 116798) @@ -0,0 +1,113 @@ +#!/usr/bin/env ruby + +### Generate a valid seeds.rb for use in seeding the database with +### valid ports and categories + +# Note - this file must be executed from inside RAILS_ROOT +# otherwise it won't be able to connect to the database + +require 'rubygems' +require 'active_record' +require 'fileutils' + +TIME_FILE = "/var/tmp/gsoc11-mpwa-sync" +ROOT = File.expand_path(File.dirname(__FILE__) + "/../") +NEW_PORTS = "#{ROOT}/bin/new_ports" +PORT_INDEX = "/opt/local/var/macports/sources/rsync.macports.org/release/tarballs/ports" +RAILS_ROOT = "#{ROOT}" +MODE = "production" + +require File.expand_path(RAILS_ROOT + '/app/models/category.rb', __FILE__) +require File.expand_path(RAILS_ROOT + '/app/models/port.rb', __FILE__) + + +if File.exists?(TIME_FILE) + $mtime = File.stat(TIME_FILE).mtime.to_i +else + $mtime = 0 +end + +FileUtils.touch(TIME_FILE) + +# Output encoding magic comment +puts "# coding: UTF-8" + +$ports = Array.new +$hashed_data = Hash.new + +db_info = YAML.load_file(File.expand_path(RAILS_ROOT + '/config/database.yml', __FILE__)) +ActiveRecord::Base.establish_connection(db_info[MODE]) + +fp = IO.popen("#{NEW_PORTS} -m #{$mtime} #{PORT_INDEX}") +new_ports = fp.read.split("\n") + +if new_ports.count > 0 + new_ports << "" #add last blank line +end + +class String + # Escape single quotes + def escape_single_quotes + self.gsub(/'/, "\\\\'") + end +end + +def esc(str) + if not str.nil? + str.escape_single_quotes + else + str + end +end + +new_ports.each do |line| + unless (line == "") + data = line.match(/(\S+):\s+\{?(.+)\}?$/) + unless data.nil? #field missing, should record this if it happens + $hashed_data[data[1].to_sym] = data[2] + end + else + category_name = $hashed_data[:categories].try(:split, " ").try(:[], 0) + $category = Category.find_by_name(category_name) + if $category.nil? + $category = Category.new({:name => category_name}) + puts "category = Category.new({:name => \'#{category_name}\'})" + puts "category.save" + $category.save + end + + port = Port.find_by_name($hashed_data[:name]) + if port.nil? + port = Port.new + end + + port[:name] = $hashed_data[:name] + port[:path] = $hashed_data[:portdir] + port[:version] = $hashed_data[:version] + port[:description] = $hashed_data[:description] + port[:licenses] = $hashed_data[:license] + port[:category_id] = $category.id + port[:variants] = $hashed_data[:variants] + port[:maintainers] = $hashed_data[:maintainers] + port[:platforms] = $hashed_data[:platforms] + port[:categories] = $hashed_data[:categories] + + $ports << [$hashed_data, port] + + puts "port = Port.new" + puts "port[:name] = \'#{esc($hashed_data[:name])}\'" + puts "port[:path] = \'#{esc($hashed_data[:portdir])}\'" + puts "port[:version] = \'#{esc($hashed_data[:version])}\'" + puts "port[:description] = \'#{esc($hashed_data[:description])}\'" + puts "port[:licenses] = \'#{esc($hashed_data[:license])}\'" + puts "port[:category_id] = #{$category.id}" + puts "port[:variants] = \'#{esc($hashed_data[:variants])}\'" + puts "port[:maintainers] = \'#{esc($hashed_data[:maintainers])}\'" + puts "port[:platforms] = \'#{esc($hashed_data[:platforms])}\'" + puts "port[:categories] = \'#{esc($hashed_data[:categories])}\'" + puts "port.save" + + port.save + $hashed_data = {} + end +end Property changes on: branches/gsoc11-statistics/stats-server/bin/generate_seed ___________________________________________________________________ Added: svn:executable + * Added: branches/gsoc11-statistics/stats-server/bin/new_ports =================================================================== --- branches/gsoc11-statistics/stats-server/bin/new_ports (rev 0) +++ branches/gsoc11-statistics/stats-server/bin/new_ports 2014-02-07 19:46:31 UTC (rev 116798) @@ -0,0 +1,140 @@ +#!/bin/sh +# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:filetype=tcl:et:sw=4:ts=4:sts=4 +# Run the Tcl interpreter \ +exec /usr/bin/tclsh "$0" "$@" + +catch {source \ + [file join "/opt/local/share/macports/Tcl" macports1.0 macports_fastload.tcl]} +package require macports +package require Pextlib + +# Globals +array set ui_options [list] +array set global_options [list] +array set global_variations [list] +set port_options [list] + +# Pass global options into mportinit +mportinit ui_options global_options global_variations + +# Standard procedures +proc print_usage args { + global argv0 + puts "Usage: $argv0 \[-d\] -m <time> \<directory\>" + puts "-d:\tOutput debugging information" + puts "-m:\tOutput ports newer than the given mtime" +} + +proc pindex {portdir} { + global target oldfd oldmtime qindex fd directory outdir \ + ui_options port_options + + # try to reuse the existing entry if it's still valid + if {[info exists qindex([string tolower [file tail $portdir]])]} { + try { + set mtime [file mtime [file join $directory $portdir Portfile]] + if {$oldmtime < $mtime} { + set offset $qindex([string tolower [file tail $portdir]]) + seek $oldfd $offset + gets $oldfd line + set name [lindex $line 0] + set len [lindex $line 1] + set line [read $oldfd $len] + array set portinfo $line + + if {[info exists ui_options(ports_debug)]} { + puts "Found entry for $portdir" + } + + foreach field [array names portinfo] { + puts $fd "${field}: $portinfo($field)" + } + puts $fd "" + + return + } + } catch {*} { + ui_warn "failed to open entry for ${portdir}" + } + } +} + +if {[expr $argc > 4]} { + print_usage + exit 1 +} + +for {set i 0} {$i < $argc} {incr i} { + set arg [lindex $argv $i] + switch -regex -- $arg { + {^-.+} { + if {$arg == "-d"} { # Turn on debug output + set ui_options(ports_debug) yes + } elseif {$arg == "-m"} { # output ports newer than mtime + incr i + set oldmtime [lindex $argv $i] + } else { + puts stderr "Unknown option: $arg" + print_usage + exit 1 + } + } + default { + set directory [file join [pwd] $arg] + } + } +} + +if {![info exists directory]} { + set directory . +} + +if {![info exists oldmtime]} { + set oldmtime 0 +} + +# cd to input directory +if {[catch {cd $directory} result]} { + puts stderr "$result" + exit 1 +} else { + set directory [pwd] +} + +# Set output directory to full path +if {[info exists outdir]} { + if {[catch {file mkdir $outdir} result]} { + puts stderr "$result" + exit 1 + } + if {[catch {cd $outdir} result]} { + puts stderr "$result" + exit 1 + } else { + set outdir [pwd] + } +} else { + set outdir $directory +} + +set outpath [file join $outdir PortIndex] +# open old index for comparison +if {[file isfile $outpath] && [file isfile ${outpath}.quick]} { + if {![catch {set oldfd [open $outpath r]}] && ![catch {set quickfd [open ${outpath}.quick r]}]} { + if {![catch {set quicklist [read $quickfd]}]} { + foreach entry [split $quicklist "\n"] { + set qindex([lindex $entry 0]) [lindex $entry 1] + } + } + close $quickfd + } +} else { + set newest 0 +} + +set fd stdout +mporttraverse pindex $directory +if {[info exists oldfd]} { + close $oldfd +} +close $fd Property changes on: branches/gsoc11-statistics/stats-server/bin/new_ports ___________________________________________________________________ Added: svn:executable + * Added: branches/gsoc11-statistics/stats-server/bin/populate.py =================================================================== --- branches/gsoc11-statistics/stats-server/bin/populate.py (rev 0) +++ branches/gsoc11-statistics/stats-server/bin/populate.py 2014-02-07 19:46:31 UTC (rev 116798) @@ -0,0 +1,149 @@ +#!/usr/bin/env python + +# populate.py +# Simulates multiple users submitting realistic data +# Uses curl to submit sample data and test rails submission code + +import random +import string +import uuid +import json +import subprocess +import time +from ports import port_list + +max_users = 250 +max_ports_per_user = 500 + +url_testdeploy = 'http://statsdeploy.heroku.com/submissions' +url_dev = 'http://127.0.0.1:3000/submissions' + +macports_versions = ['1.9.2', '1.9.99', '2.0'] +osx_versions = ['10.4', '10.5', '10.6'] +os_archs = ['i386', 'ppc'] +os_platforms = ['darwin'] +build_archs = ['x86_32', 'x86_64'] +gcc_versions = ['4.2.1', '4.3.6', '4.4.6', '4.5.3', '4.6.1'] +xcode_versions = ['2.5', '3.0', '3.1', '3.2', '4.0'] + +# Generated user ids +users = [] + +# The probability that a new user will be added starts at prob_new_user +prob_new_user = 95 # 95 % + +# Randomly choose entries for each category +def build_os(): + os = {} + os['macports_version'] = random.choice(macports_versions) + os['osx_version'] = random.choice(osx_versions) + os['os_arch'] = random.choice(os_archs) + os['os_platform'] = random.choice(os_platforms) + os['build_arch'] = random.choice(build_archs) + os['gcc_version'] = random.choice(gcc_versions) + os['xcode_version'] = random.choice(xcode_versions) + return os + +# Build up a space separated list of variants +# It selects a random number of variants to include from a list of valid variants +# for a port +def build_variants(variants): + varlist = variants.split() + + size = random.randint(0, len(varlist)) + randomlist = random.sample(varlist, size) + + return " ".join(randomlist) + +# Generate a list of ports for this user. +def build_ports(): + ports = [] + + # Choose a random number between 0 and max_ports_per_user + n_ports = random.randint(0, max_ports_per_user) + + # Generate n_port ports + for i in range(n_ports): + + # Choose a random port from the list of all ports + port = random.choice(port_list) + + # Generate random version strings by appending a digit to the existing version + # eg: 2.2 -> 2.2_6 + # Only append once, check if this port's version has already been modified + if not 'mod' in port: + append = ''.join(random.choice(string.digits) for i in xrange(1)) + port['version'] = port['version'] + '_' + append + port['mod'] = True # Flag that this port's version has been modified + + # Build up a list of variants from all valid variants for this port + port['variants'] = build_variants(port['variants']) + + # Append to the list of ports to submit for this user + ports.append(port) + return ports + +def decay_probability(): + global prob_new_user + + # Over time the probability that a new user will be added decreases as more users participate + decay_factor = 0.0001 + n_users = len(users) + prob_decay = n_users * decay_factor + prob = prob_new_user - prob_decay + + prob_new_user = prob + + # Always keep a minimum 5% chance of growth to simulate users new to + # macports users coming in and participating + if prob <= 5: + prob_new_user = 5 + +def generate_uuid(): + idstr = str(uuid.uuid4()) + users.append(idstr) + + return idstr + +def get_uuid(): + # Check if there are any available uuids + if len(users) == 0: + return generate_uuid() + + decay_probability() + + # Add a new user 'prob_new_user' percent of the time + # This simulates a new user deciding to participate + x = random.uniform(1,100) + print str(prob_new_user) + " n_users = " + str(len(users)) + if x <= prob_new_user: + return generate_uuid() + else: + # Get a random uuid from the list (simulate an existing user updating their info) + uuid = random.choice(users) + return uuid + +def submit(): + #url = url_testdeploy + url = url_dev + idstr = get_uuid() + + data = {} + + data['id'] = idstr + data['os'] = build_os() + data['active_ports'] = build_ports() + + json_enc = json.dumps(data) + args = "-d \'submission[data]=%s\'" % json_enc + pid = subprocess.Popen('curl ' + args + ' ' + url, shell=True) + pid.wait() + +def main(): + random.seed() + for x in range(max_users): + submit() + time.sleep(0.005) + +if __name__ == '__main__': + main() Property changes on: branches/gsoc11-statistics/stats-server/bin/populate.py ___________________________________________________________________ Added: svn:executable + *
participants (1)
-
cal@macports.org