From 891ce4b0d27918a8bfffdfef4e39e92272f39d92 Mon Sep 17 00:00:00 2001 From: gnosygnu Date: Wed, 29 Mar 2017 21:22:25 -0400 Subject: [PATCH] v4.5.0.1703: app_update --- admin/app_update/xoa_update.json | 2 +- demo/wikimedia.html | 75 ------- demo/wikimedia.js | 329 ------------------------------- 3 files changed, 1 insertion(+), 405 deletions(-) delete mode 100644 demo/wikimedia.html delete mode 100644 demo/wikimedia.js diff --git a/admin/app_update/xoa_update.json b/admin/app_update/xoa_update.json index 738ec2416..bbe8fa254 100644 --- a/admin/app_update/xoa_update.json +++ b/admin/app_update/xoa_update.json @@ -1 +1 @@ -{"version_id":"520"} \ No newline at end of file +{"version_id":"521"} \ No newline at end of file diff --git a/demo/wikimedia.html b/demo/wikimedia.html deleted file mode 100644 index 1ee146abd..000000000 --- a/demo/wikimedia.html +++ /dev/null @@ -1,75 +0,0 @@ - - - - - - - - Example urls: - - - \ No newline at end of file diff --git a/demo/wikimedia.js b/demo/wikimedia.js deleted file mode 100644 index 673a91cc0..000000000 --- a/demo/wikimedia.js +++ /dev/null @@ -1,329 +0,0 @@ -/* - Given a domain and a category, list its member pages and readability score - EX: http://xowa.org/wikimedia.html?domain=en.wikipedia.org&category=Earth -*/ -(function (wm) { - wm.category = new function() { - // ********************************************** - // member variables - // ********************************************** - // test mode - this.production = true; - - // wikimedia domain; EX: en.wikipedia.org - this.domain = 'en.wikipedia.org'; - - // array of pages - this.pages = []; - - // number of pages in category - this.pagesTotal = 0; - - // number of excerpts found - this.excerptsFound = 0; - - // maximum number of excerpts to find - this.excerptsMax = 50; - - // member variable for category - this.category_title = ''; - - // ********************************************** - // main entry function - // ********************************************** - this.run = function() { - setTimeout(function() { - // parse url to get domain and page - var url = window.location.href; - var domain = wm.category.getQueryArg(url, 'domain'); - wm.category.category_title = wm.category.getQueryArg(url, 'category'); - - // use domain arg if available; otherwise use default - if (domain) - wm.category.domain = domain; - - // handle bare url - if (!wm.category.category_title) - return; - - // write status - wm.category.writeHtml('
Evaluating Category:' + wm.category.category_title + '. Please wait...
'); - - // find pages in category - wm.category.findPagesInCategory(wm.category.domain, wm.category.category_title); - }, 100); - } - - this.getQueryArg = function(url, name) { - // REF: http://stackoverflow.com/questions/901115/how-can-i-get-query-string-values-in-javascript - if (!url) { - url = window.location.href; - } - name = name.replace(/[\[\]]/g, "\\$&"); - var regex = new RegExp("[?&]" + name + "(=([^&#]*)|&|#|$)"), - pages = regex.exec(url); - if (!pages) return null; - if (!pages[2]) return ''; - return decodeURIComponent(pages[2].replace(/\+/g, " ")); - } - - // ********************************************** - // find page in category - // ********************************************** - this.findPagesInCategory = function(domain, category) { - // run ajax; NOTE: must specify origin to bypass CORS; http://stackoverflow.com/a/38921370 - if (wm.category.production) { - var url = 'https://' + domain + '/w/api.php?action=query&format=json&formatversion=2&origin=*&list=categorymembers&cmlimit=' + wm.category.excerptsMax + '&cmtitle=Category:' + category; - wm.category.runAjax(url, wm.category.findPagesInCategoryCallback); - } - else { - // var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"},{"pageid":51506837,"ns":0,"title":"Outline of Earth"},{"pageid":25287133,"ns":0,"title":"Anywhere on Earth"},{"pageid":174069,"ns":0,"title":"Asteroid impact avoidance"},{"pageid":35971482,"ns":0,"title":"Day length fluctuations"},{"pageid":33256286,"ns":0,"title":"Demographics of the world"},{"pageid":19509955,"ns":0,"title":"Earth in culture"},{"pageid":212485,"ns":0,"title":"Earth religion"},{"pageid":944638,"ns":0,"title":"Earth's energy budget"},{"pageid":41077022,"ns":0,"title":"Earth's internal heat budget"}]}}; - var root = {"query":{"categorymembers":[{"pageid":9228,"ns":0,"title":"Earth"}]}}; - // var root = {"query":{"categorymembers":[]}}; - wm.category.findPagesInCategoryCallbackRoot(root); - } - } - - this.findPagesInCategoryCallback = function() { - if (this.readyState != 4 || this.status != 200) return; - wm.category.findPagesInCategoryCallbackRoot(JSON.parse(this.responseText)); - } - - this.findPagesInCategoryCallbackRoot = function(root) { - // loop each page in category - var categorymembers = root.query.categorymembers; - for (var categoryIndex in categorymembers) { - // get category - var category = categorymembers[categoryIndex]; - - // get member vars - var page_id = category.pageid; - var ns = category.ns; - var title = category.title; - - // populate local pages table - wm.category.pages[page_id] = category; - - // increment total - wm.category.pagesTotal++; - - // assign score - category.score = 'N/A'; - } - - if (wm.category.pagesTotal == 0) { - wm.category.writeHtml("No results found for Category:" + wm.category.category_title); - } - else { - // get excerpts - wm.category.getExcerpts(); - } - } - - // ********************************************** - // get excerpts - // ********************************************** - this.getExcerpts = function() { - // loop each page to get excerpt - var excerptsCount = 0; - for (var page_id in wm.category.pages) { - var category = wm.category.pages[page_id]; - - // exit if too many - if (excerptsCount++ >= wm.category.excerptsMax) { - // NOTE: must update pagesTotal - wm.category.pagesTotal = wm.category.excerptsMax; - break; - } - - // run ajax; NOTE: must specify origin to bypass CORS; http://stackoverflow.com/a/38921370 - if (wm.category.production) { - var url = 'https://' + wm.category.domain + '/w/api.php?action=query&format=json&formatversion=2&origin=*&prop=extracts&exintro=1&explaintext&titles=' + category.title; - wm.category.runAjax(url, wm.category.getExcerptCallback); - } - else { - var root = {"query":{"pages": - [ - {"pageid":9228,"ns":0,"title":"Earth","extract":"Earth (Greek: Γαῖα Gaia; Latin: Terra)."} - ]}}; - wm.category.getExcerptCallbackRoot(root); - } - } - } - this.getExcerptCallback = function() { - if (this.readyState != 4 || this.status != 200) return; - - var root = JSON.parse(this.responseText); - wm.category.getExcerptCallbackRoot(root); - } - this.getExcerptCallbackRoot = function(root) { - // get variables - var page = root.query.pages[0]; // only 1 page per api call - var page_id = page.pageid; - var excerpt = page.extract; - - // calc readability score - var score = wm.category.calcReadabilityScore(page.title, excerpt); - - // update local category - var category = wm.category.pages[page_id]; - category.excerpt = excerpt; - category.score = score[0]; - category.totalSentences = score[1]; - category.totalWords = score[2]; - category.totalSyllables = score[3]; - if (!category.score) - console.log(JSON.stringify(category)); - - // if last category, print all - if (++wm.category.excerptsFound == wm.category.pagesTotal) { - wm.category.printResults(); - } - } - - // ********************************************** - // calc readability - // ********************************************** - this.calcReadabilityScore = function(title, s) { - // REF: https://en.wikipedia.org/wiki/Flesch–Kincaid_readability_tests - - // count words and sentences - var words = wm.category.toWordArray(s); - if (words.length == 1) return [999, 0, 0, 0]; - var totalWords = words.length; - var totalSentences = wm.category.countSentences(s); - - // count syllables - var totalSyllables = 0; - var wordsLength = words.length; - for (var i = 0; i < wordsLength; i++) { - totalSyllables += wm.category.countSyllablesInWord(words[i]); - } - - // calc score: again, see https://en.wikipedia.org/wiki/Flesch–Kincaid_readability_tests - var score = 206.835 - (1.015 * (totalWords / totalSentences)) - (84.6 * (totalSyllables / totalWords)); - return [score, totalSentences, totalWords, totalSyllables]; - } - - this.toWordArray = function(s){ - // REF: http://stackoverflow.com/a/18679657 - s = s.replace(/(^\s*)|(\s*$)/gi,"");//exclude start and end white-space - s = s.replace(/[ ]{2,}/gi," ");//2 or more space to 1 - s = s.replace(/\n /,"\n"); // exclude newline with a start spacing - return s.split(' '); - } - - this.countSentences = function(s) { - // REF: http://stackoverflow.com/questions/35215348/count-sentences-in-string-with-javascript - var replaced = s.replace(/\w[.?!](\s|$)/g, "$1|"); - var arr = replaced.split("|"); - var arr_len = arr.length; - var count = 0; - for (var i = 0; i < arr_len; i++) { - var sentence = arr[i]; - sentence = sentence.trim(); // remove any whitespace - // ignore 0 length sentences; note that "Yes." will become ["Yes", ""] so 2nd needs to be ignored - if (sentence.length == 0) continue; - - // add back acronyms; 5 is a heuristic for maximum length of acronym - if (sentence.length < 5) { - // ignore; NOTE: not handling "Words U.S.A." will break up into ["Words U", "S", "A"]; - } - else { - count++; - } - } - return count; - } - - this.countSyllablesInWord = function(word) { - // REF: http://stackoverflow.com/questions/5686483/how-to-compute-number-of-syllables-in-a-word-in-javascript - word = word.toLowerCase(); //word.downcase! - if(word.length <= 3) {return 1;} //return 1 if word.length <= 3 - word = word.replace(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, ''); //word.sub!(/(?:[^laeiouy]es|ed|[^laeiouy]e)$/, '') - if (word == null) return 1; - - word = word.replace(/^y/, ''); //word.sub!(/^y/, '') - if (word == null) return 1; - - word = word.match(/[aeiouy]{1,2}/g); //word.scan(/[aeiouy]{1,2}/).size - return word == null ? 1 : word.length; - } - - // ********************************************** - // printResults - // ********************************************** - this.printResults = function() { - // sort results by score - wm.category.pages.sort(wm.category.compareResult); - - // generate string - var s - = '
\n' - + '
Readability scores for member pages of ' + wm.category.buildWikiLink(wm.category.domain, 'Category:' + wm.category.category_title) + ' in ' + wm.category.domain + '
\n' - + '
\n' - + '
\n' - + '
\n' - + '
\n' - + '
Title
\n' - + '
Score
\n' - + '
' - ; - for (var page_id in wm.category.pages) { - var category = wm.category.pages[page_id]; - - // get category_title for url - var page_enc = category.title.replace(/ /g, '_'); - page_enc = encodeURI(page_enc); - - // get score - var score = category.score; - if (score === 999) { - score = 'N/A'; - } - else { - score = score.toFixed(2); - } - - s += '
\n' - + '
' + wm.category.buildWikiLink(wm.category.domain, category.title) + '\n' - + ' \n' - + ' Sentences: ' + category.totalSentences + '
\n' - + ' Words: ' + category.totalWords + '
\n' - + ' Syllables: ' + category.totalSyllables + '
\n' - + '
\n' - + category.excerpt - + '
\n' - + '
\n' - + '
' + score + '
\n' - + '
\n'; - } - s += '
'; - - // print string - wm.category.writeHtml(s); - } - this.buildWikiLink = function(wiki, page) { - var page_enc = page.replace(/ /g, '_'); - page_enc = encodeURI(page_enc); - return '' + page + '' - } - this.compareResult = function(lhs, rhs) { - // sort from least readable to most readable - return (lhs.score - rhs.score); - } - - // ********************************************** - // utility - // ********************************************** - this.runAjax = function(url, callback) { - var xhr = new XMLHttpRequest(); - xhr.open("GET", url, true); - xhr.onreadystatechange = callback; - xhr.send(); - } - this.writeHtml = function(html) { - document.body.innerHTML = html; - } - } -}(window.wm = window.wm || {}));