1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

handle missing categories

This commit is contained in:
gnosygnu 2017-03-12 17:24:34 -04:00
parent a57340c461
commit 30b7f64df8
2 changed files with 62 additions and 30 deletions

View File

@ -2,6 +2,10 @@
<head> <head>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" /> <meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<style type="text/css"> <style type="text/css">
.header_div {
align:center;
width:800px; margin:0 auto;
}
.results_div { .results_div {
align:center; align:center;
width:800px; margin:0 auto; width:800px; margin:0 auto;
@ -62,6 +66,10 @@
<script src="wikimedia.js" type='text/javascript'></script> <script src="wikimedia.js" type='text/javascript'></script>
</head> </head>
<body onload="wm.category.run();"> <body onload="wm.category.run();">
Please wait. Generating... Example urls:
<ul>
<li><a href='http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Earth'>http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Earth</a></li>
<li><a href='http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Coffee'>http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Coffee</a></li>
</ul>
</body> </body>
</html> </html>

View File

@ -1,6 +1,6 @@
/* /*
Given a domain and a category, list its member pages and readability score Given a domain and a category, list its member pages and readability score
EX: http://xowa.org/wikimedia.html?domain=en.wikipedia.org&page=Earth EX: http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Earth
*/ */
(function (wm) { (function (wm) {
wm.category = new function() { wm.category = new function() {
@ -13,11 +13,11 @@
// wikimedia domain; EX: en.wikipedia.org // wikimedia domain; EX: en.wikipedia.org
this.domain = 'en.wikipedia.org'; this.domain = 'en.wikipedia.org';
// array of categories // array of pages
this.categories = []; this.pages = [];
// number of pages in category // number of pages in category
this.categoriesTotal = 0; this.pagesTotal = 0;
// number of excerpts found // number of excerpts found
this.excerptsFound = 0; this.excerptsFound = 0;
@ -25,6 +25,9 @@
// maximum number of excerpts to find // maximum number of excerpts to find
this.excerptsMax = 50; this.excerptsMax = 50;
// member variable for category
this.category_title = '';
// ********************************************** // **********************************************
// main entry function // main entry function
// ********************************************** // **********************************************
@ -33,14 +36,17 @@
// parse url to get domain and page // parse url to get domain and page
var url = window.location.href; var url = window.location.href;
var domain = wm.category.getQueryArg(url, 'domain'); var domain = wm.category.getQueryArg(url, 'domain');
var category = wm.category.getQueryArg(url, 'category'); wm.category.category_title = wm.category.getQueryArg(url, 'category');
// use domain arg if available; otherwise use default // use domain arg if available; otherwise use default
if (domain) if (domain)
wm.category.domain = domain; wm.category.domain = domain;
// write status
wm.category.writeHtml('<div class="header_div">Evaluating Category:' + wm.category.category_title + '. Please wait...</div>');
// find pages in category // find pages in category
wm.category.findPagesInCategory(wm.category.domain, category); wm.category.findPagesInCategory(wm.category.domain, wm.category.category_title);
}, 100); }, 100);
} }
@ -51,10 +57,10 @@
} }
name = name.replace(/[\[\]]/g, "\\$&"); name = name.replace(/[\[\]]/g, "\\$&");
var regex = new RegExp("[?&]" + name + "(=([^&#]*)|&|#|$)"), var regex = new RegExp("[?&]" + name + "(=([^&#]*)|&|#|$)"),
categories = regex.exec(url); pages = regex.exec(url);
if (!categories) return null; if (!pages) return null;
if (!categories[2]) return ''; if (!pages[2]) return '';
return decodeURIComponent(categories[2].replace(/\+/g, " ")); return decodeURIComponent(pages[2].replace(/\+/g, " "));
} }
// ********************************************** // **********************************************
@ -67,8 +73,9 @@
wm.category.runAjax(url, wm.category.findPagesInCategoryCallback); wm.category.runAjax(url, wm.category.findPagesInCategoryCallback);
} }
else { else {
// var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"},{"pageid":51506837,"ns":0,"title":"Outline of Earth"},{"pageid":25287133,"ns":0,"title":"Anywhere on Earth"},{"pageid":174069,"ns":0,"title":"Asteroid impact avoidance"},{"pageid":35971482,"ns":0,"title":"Day length fluctuations"},{"pageid":33256286,"ns":0,"title":"Demographics of the world"},{"pageid":19509955,"ns":0,"title":"Earth in culture"},{"pageid":212485,"ns":0,"title":"Earth religion"},{"pageid":944638,"ns":0,"title":"Earth's energy budget"},{"pageid":41077022,"ns":0,"title":"Earth's internal heat budget"}]}}; // var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"},{"pageid":51506837,"ns":0,"title":"Outline of Earth"},{"pageid":25287133,"ns":0,"title":"Anywhere on Earth"},{"pageid":174069,"ns":0,"title":"Asteroid impact avoidance"},{"pageid":35971482,"ns":0,"title":"Day length fluctuations"},{"pageid":33256286,"ns":0,"title":"Demographics of the world"},{"pageid":19509955,"ns":0,"title":"Earth in culture"},{"pageid":212485,"ns":0,"title":"Earth religion"},{"pageid":944638,"ns":0,"title":"Earth's energy budget"},{"pageid":41077022,"ns":0,"title":"Earth's internal heat budget"}]}};
var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"}]}}; var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"}]}};
// var root = {"query":{"categorymembers":[]}};
wm.category.findPagesInCategoryCallbackRoot(root); wm.category.findPagesInCategoryCallbackRoot(root);
} }
} }
@ -90,18 +97,23 @@
var ns = category.ns; var ns = category.ns;
var title = category.title; var title = category.title;
// populate local categories table // populate local pages table
wm.category.categories[page_id] = category; wm.category.pages[page_id] = category;
// increment total // increment total
wm.category.categoriesTotal++; wm.category.pagesTotal++;
// assign score // assign score
category.score = 'N/A'; category.score = 'N/A';
} }
// now, get excerpts if (wm.category.pagesTotal == 0) {
wm.category.getExcerpts(); wm.category.writeHtml("No results found for Category:" + wm.category.category_title);
}
else {
// get excerpts
wm.category.getExcerpts();
}
} }
// ********************************************** // **********************************************
@ -110,13 +122,13 @@
this.getExcerpts = function() { this.getExcerpts = function() {
// loop each page to get excerpt // loop each page to get excerpt
var excerptsCount = 0; var excerptsCount = 0;
for (var page_id in wm.category.categories) { for (var page_id in wm.category.pages) {
var category = wm.category.categories[page_id]; var category = wm.category.pages[page_id];
// exit if too many // exit if too many
if (excerptsCount++ >= wm.category.excerptsMax) { if (excerptsCount++ >= wm.category.excerptsMax) {
// NOTE: must update categoriesTotal // NOTE: must update pagesTotal
wm.category.categoriesTotal = wm.category.excerptsMax; wm.category.pagesTotal = wm.category.excerptsMax;
break; break;
} }
@ -150,7 +162,7 @@
var score = wm.category.calcReadabilityScore(page.title, excerpt); var score = wm.category.calcReadabilityScore(page.title, excerpt);
// update local category // update local category
var category = wm.category.categories[page_id]; var category = wm.category.pages[page_id];
category.excerpt = excerpt; category.excerpt = excerpt;
category.score = score[0]; category.score = score[0];
category.totalSentences = score[1]; category.totalSentences = score[1];
@ -160,7 +172,7 @@
console.log(JSON.stringify(category)); console.log(JSON.stringify(category));
// if last category, print all // if last category, print all
if (++wm.category.excerptsFound == wm.category.categoriesTotal) { if (++wm.category.excerptsFound == wm.category.pagesTotal) {
wm.category.printResults(); wm.category.printResults();
} }
} }
@ -239,18 +251,22 @@
// ********************************************** // **********************************************
this.printResults = function() { this.printResults = function() {
// sort results by score // sort results by score
wm.category.categories.sort(wm.category.compareResult); wm.category.pages.sort(wm.category.compareResult);
// generate string // generate string
var s var s
= '<div class="results_div">\n' = '<div class="header_div">\n'
+ ' <div>Readability scores for member pages of ' + wm.category.buildWikiLink(wm.category.domain, 'Category:' + wm.category.category_title) + ' in ' + wm.category.domain + '</div>\n'
+ '</div>\n'
+ '<br/>\n'
+ '<div class="results_div">\n'
+ ' <div class="result_div">\n' + ' <div class="result_div">\n'
+ ' <div class="result_title result_header">Title</div>\n' + ' <div class="result_title result_header">Title</div>\n'
+ ' <div class="result_score result_header">Score</div>\n' + ' <div class="result_score result_header">Score</div>\n'
+ ' </div>' + ' </div>'
; ;
for (var page_id in wm.category.categories) { for (var page_id in wm.category.pages) {
var category = wm.category.categories[page_id]; var category = wm.category.pages[page_id];
// get category_title for url // get category_title for url
var page_enc = category.title.replace(/ /g, '_'); var page_enc = category.title.replace(/ /g, '_');
@ -266,7 +282,7 @@
} }
s += ' <div class="result_div">\n' s += ' <div class="result_div">\n'
+ ' <div class="result_title tooltip"><a href="https://' + wm.category.domain + '/wiki/' + page_enc + '">' + category.title + '</a>\n' + ' <div class="result_title tooltip">' + wm.category.buildWikiLink(wm.category.domain, category.title) + '\n'
+ ' <span class="tooltiptext">\n' + ' <span class="tooltiptext">\n'
+ ' Sentences: ' + category.totalSentences + '<br/>\n' + ' Sentences: ' + category.totalSentences + '<br/>\n'
+ ' Words: ' + category.totalWords + '<br/>\n' + ' Words: ' + category.totalWords + '<br/>\n'
@ -281,7 +297,12 @@
s += '</div>'; s += '</div>';
// print string // print string
document.body.innerHTML = s; wm.category.writeHtml(s);
}
this.buildWikiLink = function(wiki, page) {
var page_enc = page.replace(/ /g, '_');
page_enc = encodeURI(page_enc);
return '<a href="https://' + wiki + '/wiki/' + page_enc + '">' + page + '</a>'
} }
this.compareResult = function(lhs, rhs) { this.compareResult = function(lhs, rhs) {
// sort from least readable to most readable // sort from least readable to most readable
@ -297,5 +318,8 @@
xhr.onreadystatechange = callback; xhr.onreadystatechange = callback;
xhr.send(); xhr.send();
} }
this.writeHtml = function(html) {
document.body.innerHTML = html;
}
} }
}(window.wm = window.wm || {})); }(window.wm = window.wm || {}));