1
0
mirror of https://github.com/gnosygnu/xowa.git synced 2024-10-27 20:34:16 +00:00

v4.5.0.1703: app_update

This commit is contained in:
gnosygnu 2017-03-29 21:22:25 -04:00
parent 4999f70727
commit 891ce4b0d2
3 changed files with 1 additions and 405 deletions

View File

@ -1 +1 @@
{"version_id":"520"} {"version_id":"521"}

View File

@ -1,75 +0,0 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=UTF-8" />
<style type="text/css">
.header_div {
align:center;
width:800px; margin:0 auto;
}
.results_div {
align:center;
width:800px; margin:0 auto;
border-bottom: 1px solid #ddd;
border-left: 1px solid #ddd;
border-right: 1px solid #ddd;
}
.result_header {
font-weight:bold;
background-color: #eee;
}
.result_div {
display:flex;
border-top: 1px solid #ddd;
}
.result_title {
width:100%;
padding:2px;
}
.result_score {
width:80px;
padding:2px;
text-align:right;
border-left: 1px solid #ddd;
}
.result_hover {
width:80px;
padding:2px;
text-align:center;
border-left: 1px solid #ddd;
}
/* REF:https://www.w3schools.com/css/css_tooltip.asp */
.tooltip {
position: relative;
display: inline-block;
/* border-bottom: 1px dotted black; */
}
.tooltip .tooltiptext {
visibility: hidden;
width: 100%;
background-color: white;
color: black;
border: 1px solid black;
padding: 5px;
/* Position the tooltip */
position: absolute;
z-index: 1;
}
.tooltip:hover .tooltiptext {
visibility: visible;
}
</style>
<script src="wikimedia.js" type='text/javascript'></script>
</head>
<body onload="wm.category.run();">
Example urls:
<ul>
<li><a href='http://xowa.org/demo/wikimedia.html?domain=en.wikipedia.org&category=Earth'>http://xowa.org/demo/wikimedia.html?domain=en.wikipedia.org&category=Earth</a></li>
<li><a href='http://xowa.org/demo/wikimedia.html?domain=en.wikipedia.org&category=Coffee'>http://xowa.org/demo/wikimedia.html?domain=en.wikipedia.org&category=Coffee</a></li>
</ul>
</body>
</html>

View File

@ -1,329 +0,0 @@
/*
Given a domain and a category, list its member pages and readability score
EX: http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Earth
*/
(function (wm) {
wm.category = new function() {
// **********************************************
// member variables
// **********************************************
// test mode
this.production = true;
// wikimedia domain; EX: en.wikipedia.org
this.domain = 'en.wikipedia.org';
// array of pages
this.pages = [];
// number of pages in category
this.pagesTotal = 0;
// number of excerpts found
this.excerptsFound = 0;
// maximum number of excerpts to find
this.excerptsMax = 50;
// member variable for category
this.category_title = '';
// **********************************************
// main entry function
// **********************************************
this.run = function() {
setTimeout(function() {
// parse url to get domain and page
var url = window.location.href;
var domain = wm.category.getQueryArg(url, 'domain');
wm.category.category_title = wm.category.getQueryArg(url, 'category');
// use domain arg if available; otherwise use default
if (domain)
wm.category.domain = domain;
// handle bare url
if (!wm.category.category_title)
return;
// write status
wm.category.writeHtml('<div class="header_div">Evaluating Category:' + wm.category.category_title + '. Please wait...</div>');
// find pages in category
wm.category.findPagesInCategory(wm.category.domain, wm.category.category_title);
}, 100);
}
this.getQueryArg = function(url, name) {
// REF: http://stackoverflow.com/questions/901115/how-can-i-get-query-string-values-in-javascript
if (!url) {
url = window.location.href;
}
name = name.replace(/[\[\]]/g, "\\$&");
var regex = new RegExp("[?&]" + name + "(=([^&#]*)|&|#|$)"),
pages = regex.exec(url);
if (!pages) return null;
if (!pages[2]) return '';
return decodeURIComponent(pages[2].replace(/\+/g, " "));
}
// **********************************************
// find page in category
// **********************************************
this.findPagesInCategory = function(domain, category) {
// run ajax; NOTE: must specify origin to bypass CORS; http://stackoverflow.com/a/38921370
if (wm.category.production) {
var url = 'https://' + domain + '/w/api.php?action=query&format=json&formatversion=2&origin=*&list=categorymembers&cmlimit=' + wm.category.excerptsMax + '&cmtitle=Category:' + category;
wm.category.runAjax(url, wm.category.findPagesInCategoryCallback);
}
else {
// var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"},{"pageid":51506837,"ns":0,"title":"Outline of Earth"},{"pageid":25287133,"ns":0,"title":"Anywhere on Earth"},{"pageid":174069,"ns":0,"title":"Asteroid impact avoidance"},{"pageid":35971482,"ns":0,"title":"Day length fluctuations"},{"pageid":33256286,"ns":0,"title":"Demographics of the world"},{"pageid":19509955,"ns":0,"title":"Earth in culture"},{"pageid":212485,"ns":0,"title":"Earth religion"},{"pageid":944638,"ns":0,"title":"Earth's energy budget"},{"pageid":41077022,"ns":0,"title":"Earth's internal heat budget"}]}};
var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"}]}};
// var root = {"query":{"categorymembers":[]}};
wm.category.findPagesInCategoryCallbackRoot(root);
}
}
this.findPagesInCategoryCallback = function() {
if (this.readyState != 4 || this.status != 200) return;
wm.category.findPagesInCategoryCallbackRoot(JSON.parse(this.responseText));
}
this.findPagesInCategoryCallbackRoot = function(root) {
// loop each page in category
var categorymembers = root.query.categorymembers;
for (var categoryIndex in categorymembers) {
// get category
var category = categorymembers[categoryIndex];
// get member vars
var page_id = category.pageid;
var ns = category.ns;
var title = category.title;
// populate local pages table
wm.category.pages[page_id] = category;
// increment total
wm.category.pagesTotal++;
// assign score
category.score = 'N/A';
}
if (wm.category.pagesTotal == 0) {
wm.category.writeHtml("No results found for Category:" + wm.category.category_title);
}
else {
// get excerpts
wm.category.getExcerpts();
}
}
// **********************************************
// get excerpts
// **********************************************
this.getExcerpts = function() {
// loop each page to get excerpt
var excerptsCount = 0;
for (var page_id in wm.category.pages) {
var category = wm.category.pages[page_id];
// exit if too many
if (excerptsCount++ >= wm.category.excerptsMax) {
// NOTE: must update pagesTotal
wm.category.pagesTotal = wm.category.excerptsMax;
break;
}
// run ajax; NOTE: must specify origin to bypass CORS; http://stackoverflow.com/a/38921370
if (wm.category.production) {
var url = 'https://' + wm.category.domain + '/w/api.php?action=query&format=json&formatversion=2&origin=*&prop=extracts&exintro=1&explaintext&titles=' + category.title;
wm.category.runAjax(url, wm.category.getExcerptCallback);
}
else {
var root = {"query":{"pages":
[
{"pageid":9228,"ns":0,"title":"Earth","extract":"Earth (Greek: Γαῖα Gaia; Latin: Terra)."}
]}};
wm.category.getExcerptCallbackRoot(root);
}
}
}
this.getExcerptCallback = function() {
if (this.readyState != 4 || this.status != 200) return;
var root = JSON.parse(this.responseText);
wm.category.getExcerptCallbackRoot(root);
}
this.getExcerptCallbackRoot = function(root) {
// get variables
var page = root.query.pages[0]; // only 1 page per api call
var page_id = page.pageid;
var excerpt = page.extract;
// calc readability score
var score = wm.category.calcReadabilityScore(page.title, excerpt);
// update local category
var category = wm.category.pages[page_id];
category.excerpt = excerpt;
category.score = score[0];
category.totalSentences = score[1];
category.totalWords = score[2];
category.totalSyllables = score[3];
if (!category.score)
console.log(JSON.stringify(category));
// if last category, print all
if (++wm.category.excerptsFound == wm.category.pagesTotal) {
wm.category.printResults();
}
}
// **********************************************
// calc readability
// **********************************************
this.calcReadabilityScore = function(title, s) {
// REF: https://en.wikipedia.org/wiki/FleschKincaid_readability_tests
// count words and sentences
var words = wm.category.toWordArray(s);
if (words.length == 1) return [999, 0, 0, 0];
var totalWords = words.length;
var totalSentences = wm.category.countSentences(s);
// count syllables
var totalSyllables = 0;
var wordsLength = words.length;
for (var i = 0; i < wordsLength; i++) {
totalSyllables += wm.category.countSyllablesInWord(words[i]);
}
// calc score: again, see https://en.wikipedia.org/wiki/FleschKincaid_readability_tests
var score = 206.835 - (1.015 * (totalWords / totalSentences)) - (84.6 * (totalSyllables / totalWords));
return [score, totalSentences, totalWords, totalSyllables];
}
this.toWordArray = function(s){
// REF: http://stackoverflow.com/a/18679657
s = s.replace(/(^\s*)|(\s*$)/gi,"");//exclude start and end white-space
s = s.replace(/[ ]{2,}/gi," ");//2 or more space to 1
s = s.replace(/\n /,"\n"); // exclude newline with a start spacing
return s.split(' ');
}
this.countSentences = function(s) {
// REF: http://stackoverflow.com/questions/35215348/count-sentences-in-string-with-javascript
var replaced = s.replace(/\w[.?!](\s|$)/g, "$1|");
var arr = replaced.split("|");
var arr_len = arr.length;
var count = 0;
for (var i = 0; i < arr_len; i++) {
var sentence = arr[i];
sentence = sentence.trim(); // remove any whitespace
// ignore 0 length sentences; note that "Yes." will become ["Yes", ""] so 2nd needs to be ignored
if (sentence.length == 0) continue;
// add back acronyms; 5 is a heuristic for maximum length of acronym
if (sentence.length < 5) {
// ignore; NOTE: not handling "Words U.S.A." will break up into ["Words U", "S", "A"];
}
else {
count++;
}
}
return count;
}
this.countSyllablesInWord = function(word) {
// REF: http://stackoverflow.com/questions/5686483/how-to-compute-number-of-syllables-in-a-word-in-javascript
word = word.toLowerCase(); //word.downcase!
if(word.length <= 3) {return 1;} //return 1 if word.length <= 3
word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, ''); //word.sub!(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '')
if (word == null) return 1;
word = word.replace(/^y/, ''); //word.sub!(/^y/, '')
if (word == null) return 1;
word = word.match(/[aeiouy]{1,2}/g); //word.scan(/[aeiouy]{1,2}/).size
return word == null ? 1 : word.length;
}
// **********************************************
// printResults
// **********************************************
this.printResults = function() {
// sort results by score
wm.category.pages.sort(wm.category.compareResult);
// generate string
var s
= '<div class="header_div">\n'
+ ' <div>Readability scores for member pages of ' + wm.category.buildWikiLink(wm.category.domain, 'Category:' + wm.category.category_title) + ' in ' + wm.category.domain + '</div>\n'
+ '</div>\n'
+ '<br/>\n'
+ '<div class="results_div">\n'
+ ' <div class="result_div">\n'
+ ' <div class="result_title result_header">Title</div>\n'
+ ' <div class="result_score result_header">Score</div>\n'
+ ' </div>'
;
for (var page_id in wm.category.pages) {
var category = wm.category.pages[page_id];
// get category_title for url
var page_enc = category.title.replace(/ /g, '_');
page_enc = encodeURI(page_enc);
// get score
var score = category.score;
if (score === 999) {
score = 'N/A';
}
else {
score = score.toFixed(2);
}
s += ' <div class="result_div">\n'
+ ' <div class="result_title tooltip">' + wm.category.buildWikiLink(wm.category.domain, category.title) + '\n'
+ ' <span class="tooltiptext">\n'
+ ' Sentences: ' + category.totalSentences + '<br/>\n'
+ ' Words: ' + category.totalWords + '<br/>\n'
+ ' Syllables: ' + category.totalSyllables + '<br/>\n'
+ ' <br/>\n'
+ category.excerpt
+ ' </span>\n'
+ ' </div>\n'
+ ' <div class="result_score">' + score + '</div>\n'
+ ' </div>\n';
}
s += '</div>';
// print string
wm.category.writeHtml(s);
}
this.buildWikiLink = function(wiki, page) {
var page_enc = page.replace(/ /g, '_');
page_enc = encodeURI(page_enc);
return '<a href="https://' + wiki + '/wiki/' + page_enc + '">' + page + '</a>'
}
this.compareResult = function(lhs, rhs) {
// sort from least readable to most readable
return (lhs.score - rhs.score);
}
// **********************************************
// utility
// **********************************************
this.runAjax = function(url, callback) {
var xhr = new XMLHttpRequest();
xhr.open("GET", url, true);
xhr.onreadystatechange = callback;
xhr.send();
}
this.writeHtml = function(html) {
document.body.innerHTML = html;
}
}
}(window.wm = window.wm || {}));