Tuesday, September 16, 2014
   
Text Size
Login

SQL Queries for Statistics

Basically, I've started using so much more SQL in our new Business Intelligence projects that I've been revising all my scripts to see what SQL I can optimize.

One of my systems is a MediaWiki CMS which is used for the official Bournemouth University Knowledge Base. The skin itself is the index page loaded for any page in the Wiki system. It logs the IP address (VisitorIP), the URL (VisitorURL) entered via the browser useragent (VisitorUAgent), the User ID (VisitorID, 0 if not logged in) and of course the Timestamp (DateTimeStamp).

Here is a page of some MySQL queries I can do based on just those 5 columns:


MediaWiki: number of unique guests, students and staff by month. Also associates a UserID to the wikimedia_user table to identify a user.

  1.  SELECT 
  2.       CONCAT(MONTHNAME(t2.Date), ' 'YEAR(t2.Date)) Month, 
  3.       SUM(IF(t2.VisitorType='Staff', 1, 0)) Staff, 
  4.       SUM(IF(t2.VisitorType='Student', 1, 0)) Student, 
  5.       SUM(IF(t2.VisitorType='Anonymous', 1, 0)) Guests 
  6.  FROM 
  7.       ( 
  8.       SELECT 
  9.            b.user_name User, 
  10.            CASE 
  11.                 WHEN INSTR(b.user_name, '@staff'THEN 'Staff' 
  12.                 WHEN INSTR(b.user_name, '@bournemouth'THEN 'Student' 
  13.                 WHEN t1.ID=0 THEN 'Anonymous' 
  14.            END VisitorType, 
  15.            t1.Date Date 
  16.       FROM ( 
  17.            SELECT 
  18.                 MAX(a.VisitorID) ID, 
  19.                 a.VisitorIP IP, 
  20.                 a.DateTimeStamp Date 
  21.            FROM 
  22.                 custombu_stats_visits a 
  23.            WHERE 
  24.                 YEAR(a.DateTimeStamp)='2010' 
  25.            GROUP BY 
  26.                 a.VisitorIP 
  27.            ORDER BY 
  28.                 MAX(a.VisitorID) DESC 
  29.            ) t1 
  30.       LEFT OUTER JOIN 
  31.            wikimedia_user b ON t1.ID=b.user_id 
  32.       GROUP BY 
  33.            t1.IP 
  34.       ORDER BY 
  35.            b.user_name 
  36.       ) t2 
  37.  GROUP BY 
  38.       MONTH(t2.Date); 
Notes: I use this in a PHP script so where year is specified (2010), I have a variable posted from a HTML form in my PHP. Also note: The association to a user can change because anonymous guests count as guests until they login, which is when they are associated to a UserID and identified as staff or student.


Number of total hits aggregated by month with on-peak/off-peak.

  1.  SELECT 
  2.       CONCAT(MONTHNAME(a.DateTimeStamp), ' 'YEAR(a.DateTimeStamp)) Month, 
  3.       SUM(IF((HOUR(a.DateTimeStamp) BETWEEN 8 AND 18), 1, 0)) 'Worktime'
  4.       SUM(IF((HOUR(a.DateTimeStamp) BETWEEN 8 AND 18), 0, 1)) 'Out-of-Hours'
  5.       COUNT(VisitorIP) 'Total Hits' 
  6.  FROM 
  7.       custombu_stats_visits a 
  8.  WHERE 
  9.       YEAR(a.DateTimeStamp)='2010' 
  10.  GROUP BY 
  11.       CONCAT(YEAR(a.DateTimeStamp), '-'MONTH(a.DateTimeStamp)) 
  12.  ORDER BY 
  13.       a.DateTimeStamp 

Operating Systems used aggregated by month

  1.  SELECT  
  2.       CONCAT(MONTHNAME(t1.Date), ' 'YEAR(t1.Date)) Month,  
  3.       SUM(IF(t1.VisitorOS='Windows', 1, 0)) 'Windows',  
  4.       SUM(IF(t1.VisitorOS='Mac', 1, 0)) 'Mac',  
  5.       SUM(IF(t1.VisitorOS='Linux', 1, 0)) 'Linux',  
  6.       SUM(IF(t1.VisitorOS='Other', 1, 0)) 'Other',  
  7.       COUNT(t1.VisitorOS) 'Total'  
  8.  FROM ( 
  9.       SELECT      
  10.            a.DateTimeStamp Date,  
  11.            CASE  
  12.                 WHEN INSTR(a.VisitorUAgent, 'WinNT'THEN 'Windows'  
  13.                 WHEN INSTR(a.VisitorUAgent, 'Windows'THEN 'Windows'  
  14.                 WHEN INSTR(a.VisitorUAgent, 'WinNT'THEN 'Windows'  
  15.                 WHEN INSTR(a.VisitorUAgent, 'Mac_PowerPC'THEN 'Mac'  
  16.                 WHEN INSTR(a.VisitorUAgent, 'Macintosh'THEN 'Mac'  
  17.                 WHEN INSTR(a.VisitorUAgent, 'X11'THEN 'Linux'  
  18.                 WHEN INSTR(a.VisitorUAgent, 'Linux'THEN 'Linux'  
  19.                 ELSE 'Other'  
  20.            END VisitorOS,  
  21.            COUNT(a.VisitorUAgent) 'Total'  
  22.       FROM  
  23.            custombu_stats_visits a  
  24.       WHERE  
  25.            YEAR(a.DateTimeStamp)='2010'  
  26.       GROUP BY      
  27.            a.VisitorIP  
  28.       ORDER BY  
  29.            a.DateTimeStamp DESC 
  30.       ) t1  
  31.  GROUP BY  
  32.       CONCAT(MONTH(t1.Date), ' 'YEAR(t1.Date)) 


Browsers used aggregated by month

  1.  SELECT 
  2.       CONCAT(MONTHNAME(t1.Date), ' 'YEAR(t1.Date)) Month, 
  3.       SUM(IF(t1.VisitorBrowser='MSIE', 1, 0)) 'IE'
  4.       SUM(IF(t1.VisitorBrowser='Firefox', 1, 0)) 'FF'
  5.       SUM(IF(t1.VisitorBrowser='Safari', 1, 0)) 'CS'
  6.       SUM(IF(t1.VisitorBrowser='Smartphone', 1, 0)) 'Smart'
  7.       SUM(IF(t1.VisitorBrowser='Other', 1, 0)) 'Other'
  8.       COUNT(t1.VisitorBrowser) 'Total' 
  9.  FROM 
  10.  (SELECT 
  11.       a.DateTimeStamp Date, 
  12.       CASE 
  13.            WHEN INSTR(a.VisitorUAgent, 'MSIE 'THEN 'MSIE' 
  14.            WHEN INSTR(a.VisitorUAgent, 'Firefox'THEN 'Firefox' 
  15.            WHEN INSTR(a.VisitorUAgent, 'Safari'THEN 'Safari' 
  16.            WHEN INSTR(a.VisitorUAgent, 'Chrome'THEN 'Safari' 
  17.            WHEN INSTR(a.VisitorUAgent, 'BlackBerry'THEN 'Smartphone' 
  18.            WHEN INSTR(a.VisitorUAgent, 'iPhone'THEN 'Smartphone' 
  19.            WHEN INSTR(a.VisitorUAgent, 'Android'THEN 'Smartphone' 
  20.            WHEN INSTR(a.VisitorUAgent, 'NetFront'THEN 'Smartphone' 
  21.            WHEN INSTR(a.VisitorUAgent, 'SonyEricsson'THEN 'Smartphone' 
  22.            ELSE 'Other' 
  23.       END VisitorBrowser 
  24.  FROM 
  25.       custombu_stats_visits a 
  26.  WHERE 
  27.       YEAR(a.DateTimeStamp)='2010' 
  28.  GROUP BY 
  29.       a.VisitorIP 
  30.  ORDER BY 
  31.       a.DateTimeStamp DESC) t1 
  32.  GROUP BY 
  33.       CONCAT(MONTH(t1.Date), ' 'YEAR(t1.Date)) 


Number of actions aggregated by top 10 users this month

  1.  SELECT 
  2.       MONTH(a.DateTimeStamp) Month, 
  3.       SUM(IF(INSTR(a.VisitorURL, '?'), 0, 1)) Views, 
  4.       SUM(IF(INSTR(a.VisitorURL, 'Special:Search'), 1, 0)) Searches, 
  5.       SUM(IF(INSTR(a.VisitorURL, 'create=Add'), 1, 0)) 'Pages Created'
  6.       SUM(IF(INSTR(a.VisitorURL, 'action=edit'), 1, 0)) 'Pages Edited'
  7.       SUM(IF(INSTR(a.VisitorURL, 'Special:Move'), 1, 0)) 'Pages Renamed'
  8.       SUM(IF(INSTR(a.VisitorURL, 'Kerberos_Login?returnto'), 1, 0)) 'Logins'
  9.       SUM(IF(INSTR(a.VisitorURL, 'action=delete'), 1, 0)) 'Pages Deleted'
  10.       ( COUNT(a.VisitorID) - SUM(IF(INSTR(a.VisitorURL, '?'), 0, 1)) - SUM(IF(INSTR(a.VisitorURL, 'Special:Search'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'create=Add'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'action=edit'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'action=delete'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'Kerberos_Login?returnto'), 1, 0)) - SUM(IF(INSTR(a.VisitorURL, 'Special:Move'), 1, 0)) ) 'Other Actions'
  11.       COUNT(a.VisitorID) 'Total Actions'
  12.       a.VisitorIP IP, 
  13.       b.user_id UserID, 
  14.       b.user_name Name 
  15.  FROM 
  16.       custombu_stats_visits a, 
  17.       wikimedia_user b 
  18.  WHERE 
  19.       a.VisitorID=b.user_id 
  20.  AND 
  21.       a.DateTimeStamp BETWEEN CONCAT('2010''-'MONTH(NOW()), '-01'
  22.  AND 
  23.       CONCAT('2010''-'MONTH(NOW()), '-'DAYOFMONTH(NOW())) 
  24.  GROUP BY 
  25.       b.user_name 
  26.  ORDER BY 
  27.       COUNT(a.VisitorID) DESC 
  28.  LIMIT 
  29.       0,10; 
Add Comment

Name:

Email:

Website:

Message:


Latest Posts