Строка 1: |
Строка 1: |
| + | |
| + | local coins = {}; |
| + | |
| + | |
| --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- | | --[[--------------------------< F O R W A R D D E C L A R A T I O N S >-------------------------------------- |
| ]] | | ]] |
| + | local is_set, in_array, remove_wiki_link; -- functions in Module:Citation/CS1/Utilities |
| + | |
| + | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration |
| + | |
| + | |
| + | --[[--------------------------< S T R I P _ A P O S T R O P H E _ M A R K U P >-------------------------------- |
| | | |
− | local has_accept_as_written, is_set, in_array, remove_wiki_link, strip_apostrophe_markup; -- functions in Module:Citation/CS1/Utilities
| + | Strip wiki italic and bold markup from argument so that it doesn't contaminate COinS metadata. |
| + | This function strips common patterns of apostrophe markup. We presume that editors who have taken the time to |
| + | markup a title have, as a result, provided valid markup. When they don't, some single apostrophes are left behind. |
| | | |
− | local cfg; -- table of configuration tables that are defined in Module:Citation/CS1/Configuration | + | ]] |
| + | |
| + | local function strip_apostrophe_markup (argument) |
| + | if not is_set (argument) then return argument; end |
| + | |
| + | if argument:find ( "''", 1, true ) == nil then -- Is there at least one double apostrophe? If not, exit. |
| + | return argument; |
| + | end |
| + | |
| + | while true do |
| + | if argument:find ( "'''''", 1, true ) then -- bold italic (5) |
| + | argument=argument:gsub("%'%'%'%'%'", ""); -- remove all instances of it |
| + | elseif argument:find ( "''''", 1, true ) then -- italic start and end without content (4) |
| + | argument=argument:gsub("%'%'%'%'", ""); |
| + | elseif argument:find ( "'''", 1, true ) then -- bold (3) |
| + | argument=argument:gsub("%'%'%'", ""); |
| + | elseif argument:find ( "''", 1, true ) then -- italic (2) |
| + | argument=argument:gsub("%'%'", ""); |
| + | else |
| + | break; |
| + | end |
| + | end |
| + | return argument; -- done |
| + | end |
| | | |
| | | |
Строка 17: |
Строка 52: |
| | | |
| local function make_coins_title (title, script) | | local function make_coins_title (title, script) |
− | title = has_accept_as_written (title);
| |
| if is_set (title) then | | if is_set (title) then |
| title = strip_apostrophe_markup (title); -- strip any apostrophe markup | | title = strip_apostrophe_markup (title); -- strip any apostrophe markup |
| else | | else |
− | title = ''; -- if not set, make sure title is an empty string | + | title=''; -- if not set, make sure title is an empty string |
| end | | end |
| if is_set (script) then | | if is_set (script) then |
Строка 27: |
Строка 61: |
| script = strip_apostrophe_markup (script); -- strip any apostrophe markup | | script = strip_apostrophe_markup (script); -- strip any apostrophe markup |
| else | | else |
− | script = ''; -- if not set, make sure script is an empty string | + | script=''; -- if not set, make sure script is an empty string |
| end | | end |
| if is_set (title) and is_set (script) then | | if is_set (title) and is_set (script) then |
Строка 38: |
Строка 72: |
| --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- | | --[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >---------------------------------- |
| | | |
− | Returns a string where all of Lua's magic characters have been escaped. This is important because functions like | + | Returns a string where all of lua's magic characters have been escaped. This is important because functions like |
| string.gsub() treat their pattern and replace strings as patterns, not literal strings. | | string.gsub() treat their pattern and replace strings as patterns, not literal strings. |
| ]] | | ]] |
Строка 44: |
Строка 78: |
| local function escape_lua_magic_chars (argument) | | local function escape_lua_magic_chars (argument) |
| argument = argument:gsub("%%", "%%%%"); -- replace % with %% | | argument = argument:gsub("%%", "%%%%"); -- replace % with %% |
− | argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other Lua magic pattern characters | + | argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters |
| return argument; | | return argument; |
| end | | end |
Строка 60: |
Строка 94: |
| | | |
| while true do | | while true do |
− | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the URL and following space(s): "[url " | + | pattern = pages:match("%[(%w*:?//[^ ]+%s+)[%w%d].*%]"); -- pattern is the opening bracket, the url and following space(s): "[url " |
− | if nil == pattern then break; end -- no more URLs | + | if nil == pattern then break; end -- no more urls |
− | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape Lua's magic pattern characters | + | pattern = escape_lua_magic_chars (pattern); -- pattern is not a literal string; escape lua's magic pattern characters |
| pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible | | pages = pages:gsub(pattern, ""); -- remove as many instances of pattern as possible |
| end | | end |
| pages = pages:gsub("[%[%]]", ""); -- remove the brackets | | pages = pages:gsub("[%[%]]", ""); -- remove the brackets |
− | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens | + | pages = pages:gsub("–", "-" ); -- replace endashes with hyphens |
− | pages = pages:gsub("&%w+;", "-" ); -- and replace HTML entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? | + | pages = pages:gsub("&%w+;", "-" ); -- and replace html entities (– etc.) with hyphens; do we need to replace numerical entities like   and the like? |
| return pages; | | return pages; |
| end | | end |
Строка 80: |
Строка 114: |
| MathML with SVG or PNG fallback | | MathML with SVG or PNG fallback |
| | | |
− | All three are heavy with HTML and CSS which doesn't belong in the metadata. | + | All three are heavy with html and css which doesn't belong in the metadata. |
| | | |
| Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings | | Without this function, the metadata saved in the raw wikitext contained the rendering determined by the settings |
Строка 87: |
Строка 121: |
| This function gets the rendered form of an equation according to the editor's preference before the page is saved. It | | This function gets the rendered form of an equation according to the editor's preference before the page is saved. It |
| then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so | | then searches the rendering for the text equivalent of the rendered equation and replaces the rendering with that so |
− | that the page is saved without extraneous HTML/CSS markup and with a reasonably readable text form of the equation. | + | that the page is saved without extraneous html/css markup and with a reasonably readable text form of the equation. |
| | | |
− | When a replacement is made, this function returns true and the value with replacement; otherwise false and the initial | + | When a replacement is made, this function returns true and the value with replacement; otherwise false and the intital |
− | value. To replace multipe equations it is necessary to call this function from within a loop. | + | value. To replace multipe equations it is necesary to call this function from within a loop. |
| | | |
| ]=] | | ]=] |
Строка 120: |
Строка 154: |
| --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- | | --[[--------------------------< C O I N S _ C L E A N U P >---------------------------------------------------- |
| | | |
− | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain HTML entities. | + | Cleanup parameter values for the metadata by removing or replacing invisible characters and certain html entities. |
| | | |
| 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content | | 2015-12-10: there is a bug in mw.text.unstripNoWiki (). It replaces math stripmarkers with the appropriate content |
Строка 137: |
Строка 171: |
| end | | end |
| | | |
− | value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message | + | value = value:gsub (cfg.stripmarkers['math'], "MATH RENDER ERROR"); -- one or more couldn't be replaced; insert vague error message |
| | | |
| value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content | | value = mw.text.unstripNoWiki (value); -- replace nowiki stripmarkers with their content |
Строка 143: |
Строка 177: |
| value = value:gsub (' ', ' '); -- replace entity with plain space | | value = value:gsub (' ', ' '); -- replace entity with plain space |
| value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space | | value = value:gsub ('\226\128\138', ' '); -- replace hair space with plain space |
− | if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero-width joiner characters from indic script | + | if not mw.ustring.find (value, cfg.indic_script) then -- don't remove zero width joiner characters from indic script |
− | value = value:gsub ('‍', ''); -- remove ‍ entities | + | value = value:gsub ('‍', ''); -- remove ‍ entities |
| value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen | | value = mw.ustring.gsub (value, '[\226\128\141\226\128\139\194\173]', ''); -- remove zero-width joiner, zero-width space, soft hyphen |
| end | | end |
− | value = value:gsub ('[\009\010\013 ]+', ' '); -- replace horizontal tab, line feed, carriage return with plain space | + | value = value:gsub ('[\009\010\013]', ' '); -- replace horizontal tab, line feed, carriage return with plain space |
| return value; | | return value; |
| end | | end |
Строка 180: |
Строка 214: |
| }); | | }); |
| | | |
− | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn', 'journal', 'news', 'magazine'}) or | + | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'journal', 'news', 'magazine'}) or (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or |
− | (in_array (class, {'conference', 'interview', 'map', 'press release', 'web'}) and is_set(data.Periodical)) or
| |
| ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then | | ('citation' == class and is_set(data.Periodical) and not is_set (data.Encyclopedia)) then |
| OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier | | OCinSoutput.rft_val_fmt = "info:ofi/fmt:kev:mtx:journal"; -- journal metadata identifier |
− | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx', 'medrxiv', 'ssrn'}) then -- set genre according to the type of citation template we are rendering | + | if in_array (class, {'arxiv', 'biorxiv', 'citeseerx'}) then -- set genre according to the type of citation template we are rendering |
− | OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx, cite medrxiv, cite ssrn | + | OCinSoutput["rft.genre"] = "preprint"; -- cite arxiv, cite biorxiv, cite citeseerx |
| elseif 'conference' == class then | | elseif 'conference' == class then |
| OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) | | OCinSoutput["rft.genre"] = "conference"; -- cite conference (when Periodical set) |
Строка 198: |
Строка 231: |
| -- these used only for periodicals | | -- these used only for periodicals |
| OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall | | OCinSoutput["rft.ssn"] = data.Season; -- keywords: winter, spring, summer, fall |
− | OCinSoutput["rft.quarter"] = data.Quarter; -- single digits 1->first quarter, etc.
| |
| OCinSoutput["rft.chron"] = data.Chron; -- free-form date components | | OCinSoutput["rft.chron"] = data.Chron; -- free-form date components |
| OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books | | OCinSoutput["rft.volume"] = data.Volume; -- does not apply to books |
| OCinSoutput["rft.issue"] = data.Issue; | | OCinSoutput["rft.issue"] = data.Issue; |
− | OCinSoutput['rft.artnum'] = data.ArticleNumber; -- {{cite journal}} only
| |
| OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata | | OCinSoutput["rft.pages"] = data.Pages; -- also used in book metadata |
| | | |
Строка 223: |
Строка 254: |
| end | | end |
| end | | end |
− | else -- {'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} | + | else --{'audio-visual', 'AV-media-notes', 'DVD-notes', 'episode', 'interview', 'mailinglist', 'map', 'newsgroup', 'podcast', 'press release', 'serial', 'sign', 'speech', 'web'} |
| OCinSoutput["rft.genre"] = "unknown"; | | OCinSoutput["rft.genre"] = "unknown"; |
| end | | end |
Строка 239: |
Строка 270: |
| OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation | | OCinSoutput['rft.inst'] = data.PublisherName; -- book and dissertation |
| end | | end |
− | -- NB. Not currently supported are "info:ofi/fmt:kev:mtx:patent", "info:ofi/fmt:kev:mtx:dc", "info:ofi/fmt:kev:mtx:sch_svc", "info:ofi/fmt:kev:mtx:ctx"
| |
| -- and now common parameters (as much as possible) | | -- and now common parameters (as much as possible) |
| OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation | | OCinSoutput["rft.date"] = data.Date; -- book, journal, dissertation |
− | | + | |
| for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? | | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? |
| + | -- if k == 'ISBN' then v = clean_isbn( v ) end |
| if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end | | if k == 'ISBN' then v = v:gsub( "[^-0-9X]", "" ); end |
| local id = cfg.id_handlers[k].COinS; | | local id = cfg.id_handlers[k].COinS; |
| if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry | | if string.sub( id or "", 1, 4 ) == 'info' then -- for ids that are in the info:registry |
| OCinSoutput["rft_id"] = table.concat{ id, "/", v }; | | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; |
− | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc. that have defined COinS keywords | + | elseif string.sub (id or "", 1, 3 ) == 'rft' then -- for isbn, issn, eissn, etc that have defined COinS keywords |
| OCinSoutput[ id ] = v; | | OCinSoutput[ id ] = v; |
− | elseif 'url' == id then -- for urls that are assembled in ~/Identifiers; |asin= and |ol=
| + | elseif id then -- when cfg.id_handlers[k].COinS is not nil |
− | OCinSoutput["rft_id"] = table.concat ({data.ID_list[k], "#id-name=", cfg.id_handlers[k].label});
| + | OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v }; -- others; provide a url |
− | elseif id then -- when cfg.id_handlers[k].COinS is not nil so urls created here | |
− | OCinSoutput["rft_id"] = table.concat{ cfg.id_handlers[k].prefix, v, cfg.id_handlers[k].suffix or '', "#id-name=", cfg.id_handlers[k].label }; -- others; provide a URL and indicate identifier name as #fragment (human-readable, but transparent to browsers) | |
| end | | end |
| end | | end |
| | | |
| + | --[[ |
| + | for k, v in pairs( data.ID_list ) do -- what to do about these? For now assume that they are common to all? |
| + | local id, value = cfg.id_handlers[k].COinS; |
| + | if k == 'ISBN' then value = clean_isbn( v ); else value = v; end |
| + | if string.sub( id or "", 1, 4 ) == 'info' then |
| + | OCinSoutput["rft_id"] = table.concat{ id, "/", v }; |
| + | else |
| + | OCinSoutput[ id ] = value; |
| + | end |
| + | end |
| + | ]] |
| local last, first; | | local last, first; |
| for k, v in ipairs( data.Authors ) do | | for k, v in ipairs( data.Authors ) do |
− | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki stripmarkers, non-printing or invisible characters | + | last, first = coins_cleanup (v.last), coins_cleanup (v.first or ''); -- replace any nowiki strip markers, non-printing or invisible characers |
| if k == 1 then -- for the first author name only | | if k == 1 then -- for the first author name only |
− | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name | + | if is_set(last) and is_set(first) then -- set these COinS values if |first= and |last= specify the first author name |
| OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation | | OCinSoutput["rft.aulast"] = last; -- book, journal, dissertation |
| OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation | | OCinSoutput["rft.aufirst"] = first; -- book, journal, dissertation |
Строка 273: |
Строка 313: |
| OCinSoutput["rft.au"] = last; -- book, journal, dissertation | | OCinSoutput["rft.au"] = last; -- book, journal, dissertation |
| end | | end |
− | -- TODO: At present we do not report "et al.". Add anything special if this condition applies?
| |
| end | | end |
| end | | end |
Строка 279: |
Строка 318: |
| OCinSoutput.rft_id = data.URL; | | OCinSoutput.rft_id = data.URL; |
| OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; | | OCinSoutput.rfr_id = table.concat{ "info:sid/", mw.site.server:match( "[^/]*$" ), ":", data.RawPage }; |
− | | + | OCinSoutput = setmetatable( OCinSoutput, nil ); |
− | -- TODO: Add optional extra info: | |
− | -- rfr_dat=#REVISION<version> (referrer private data)
| |
− | -- ctx_id=<data.RawPage>#<ref> (identifier for the context object)
| |
− | -- ctx_tim=<ts> (timestamp in format yyyy-mm-ddThh:mm:ssTZD or yyyy-mm-dd)
| |
− | -- ctx_enc=info:ofi/enc:UTF-8 (character encoding)
| |
| | | |
− | OCinSoutput = setmetatable( OCinSoutput, nil );
| |
− |
| |
| -- sort with version string always first, and combine. | | -- sort with version string always first, and combine. |
− | -- table.sort( OCinSoutput ); | + | --table.sort( OCinSoutput ); |
− | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" | + | table.insert( OCinSoutput, 1, "ctx_ver=" .. ctx_ver ); -- such as "Z39.88-2004" |
| return table.concat(OCinSoutput, "&"); | | return table.concat(OCinSoutput, "&"); |
| end | | end |
Строка 304: |
Строка 336: |
| cfg = cfg_table_ptr; | | cfg = cfg_table_ptr; |
| | | |
− | has_accept_as_written = utilities_page_ptr.has_accept_as_written; -- import functions from selected Module:Citation/CS1/Utilities module | + | is_set = utilities_page_ptr.is_set; -- import functions from select Module:Citation/CS1/Utilities module |
− | is_set = utilities_page_ptr.is_set;
| |
| in_array = utilities_page_ptr.in_array; | | in_array = utilities_page_ptr.in_array; |
| remove_wiki_link = utilities_page_ptr.remove_wiki_link; | | remove_wiki_link = utilities_page_ptr.remove_wiki_link; |
− | strip_apostrophe_markup = utilities_page_ptr.strip_apostrophe_markup;
| |
| end | | end |
| | | |
| | | |
− | --[[--------------------------< E X P O R T E D F U N C T I O N S >------------------------------------------
| |
− | ]]
| |
| | | |
| return { | | return { |