This is a report on Wikimedia Maps usage across Wikimedia Projects.
According to the MediaWiki Maps page, the following wikis have Maps enabled with mapframes (maplinks are enabled on all wikis):
Let’s count how many articles on those wikis have mapframes. Some articles have more than one mapframe, so we also counted the total number of mapframes.
# dbs is a list of databases in analytics-store
wikivoyages <- grep("voyage$", dbs, value = TRUE)
wikipedias <- c("cawiki", "hewiki", "ruwiki", "mkwiki", "frwiki", "fiwiki", "nowiki", "svwiki", "ptwiki", "cswiki", "euwiki")
other_projects <- c("metawiki", "mediawikiwiki", "uawikimedia")
query <- "SELECT
COUNT(*) AS `total articles`,
SUM(IF(mapframes > 0, 1, 0)) AS `articles with a mapframe`,
SUM(COALESCE(mapframes, 0)) AS `total mapframes`,
SUM(IF(mapframes > 0, 1, 0))/COUNT(*) AS `mapframe prevalence`
FROM (
SELECT
p.page_id,
pp_value AS mapframes
FROM (
SELECT pp_page, pp_value
FROM page_props
WHERE pp_propname = 'kartographer_frames' AND pp_value > 0
) AS filtered_props
RIGHT JOIN (
SELECT page_id FROM page
WHERE page_namespace = 0 AND page_is_redirect = 0
) p
ON p.page_id = filtered_props.pp_page
) joined_tables;"
mapframes <- lapply(c(wikivoyages, wikipedias, other_projects), function(db) {
message("Fetching mapframe statistics from ", db, "...")
con <- dbConnect(MySQL(), host = "127.0.0.1", group = "client", dbname = db, port = 3307)
suppressWarnings(result <- wmf::mysql_read(query, db, con = con))
invisible(dbDisconnect(con))
return(result)
})
mapframes <- dplyr::bind_rows(mapframes)
rownames(mapframes) <- language_projects[c(wikivoyages, wikipedias, other_projects)]
Below are the results as of 11 September 2017:
DT::datatable(
mapframes,
caption = "This shows the prevalence of mapframes on wikis that have it enabled.",
filter = "top",
extensions = "Buttons",
options = list(
pageLength = 10, autoWidth = TRUE, language = list(search = "Filter:"),
order = list(list(4, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
)
) %>%
DT::formatPercentage("mapframe prevalence", 3) %>%
DT::formatCurrency(
columns = c("total articles", "articles with a mapframe", "total mapframes"),
currency = "", digits = 0
)
The mean prevalence across mapframe-enabled wikis is 6.98%. The median prevalence across mapframe-enabled wikis is 0.04%. Overall prevalence is 1.76%.
Let’s count how many articles across Wikipedia, Wikivoyage, Meta, and MediaWiki have maplinks. Some articles have more than one maplink, so we also counted the total number of maplinks.
# dbs is a list of databases in analytics-store
wikis <- grep("wiki$", dbs, value = TRUE)
wikis <- wikis[!grepl("^((wikimania)|(test)|(login)|(transition)|(arbcom))", wikis)]
wikis <- Reduce(union, list(wikis, wikivoyages, wikipedias, other_projects, "specieswiki"))
wikis <- wikis[wikis %in% names(language_projects)]
query <- "SELECT
COUNT(*) AS `total articles`,
SUM(IF(maplinks > 0, 1, 0)) AS `articles with a maplink`,
SUM(COALESCE(maplinks, 0)) AS `total maplinks`,
SUM(IF(maplinks > 0, 1, 0))/COUNT(*) AS `maplink prevalence`
FROM (
SELECT
p.page_id,
pp_value AS maplinks
FROM (
SELECT pp_page, pp_value
FROM page_props
WHERE pp_propname = 'kartographer_links' AND pp_value > 0
) AS filtered_props
RIGHT JOIN (
SELECT page_id FROM page
WHERE page_namespace = 0 AND page_is_redirect = 0
) p
ON p.page_id = filtered_props.pp_page
) joined_tables;"
maplinks <- lapply(wikis, function(db) {
message("Fetching maplink statistics from ", db, "...")
con <- dbConnect(MySQL(), host = "127.0.0.1", group = "client", dbname = db, port = 3307)
suppressWarnings(result <- wmf::mysql_read(query, db, con = con))
invisible(dbDisconnect(con))
return(result)
})
maplinks <- dplyr::bind_rows(maplinks)
rownames(maplinks) <- language_projects[wikis]
Below are the results as of 11 September 2017:
DT::datatable(
maplinks,
caption = "This shows the prevalence of maplinks on wikis.",
filter = "top",
extensions = "Buttons",
options = list(
pageLength = 10, autoWidth = TRUE, language = list(search = "Filter:"),
order = list(list(4, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
)
) %>%
DT::formatPercentage("maplink prevalence", 3) %>%
DT::formatCurrency(
columns = c("total articles", "articles with a maplink", "total maplinks"),
currency = "", digits = 0
)
The average prevalence across wikis is 1.52%. The median prevalence across wikis is 0.00%. Overall prevalence is 1.23%.
DT::datatable(
maplinks[grepl("Wikipedia$", rownames(maplinks)), ],
caption = "This shows the prevalence of maplinks on Wikipedia.",
filter = "top",
extensions = "Buttons",
options = list(
pageLength = 10, autoWidth = TRUE, language = list(search = "Filter:"),
order = list(list(4, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
)
) %>%
DT::formatPercentage("maplink prevalence", 3) %>%
DT::formatCurrency(
columns = c("total articles", "articles with a maplink", "total maplinks"),
currency = "", digits = 0
)
DT::datatable(
maplinks[grepl("Wikivoyage$", rownames(maplinks)), ],
caption = "This shows the prevalence of maplinks on Wikivoyage.",
filter = "top",
extensions = "Buttons",
options = list(
pageLength = 10, autoWidth = TRUE, language = list(search = "Filter:"),
order = list(list(4, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
)
) %>%
DT::formatPercentage("maplink prevalence", 3) %>%
DT::formatCurrency(
columns = c("total articles", "articles with a maplink", "total maplinks"),
currency = "", digits = 0
)
DT::datatable(
maplinks[rownames(maplinks) %in% c("MediaWiki", "Meta wiki", "Wikispecies"), ],
caption = "This shows the prevalence of maplinks on MediaWiki, Meta wiki, and Wikispecies.",
filter = "top",
extensions = "Buttons",
options = list(
pageLength = 10, autoWidth = TRUE, language = list(search = "Filter:"),
order = list(list(4, "desc")), dom = "Bfrtip", buttons = c("copy", "csv")
)
) %>%
DT::formatPercentage("maplink prevalence", 3) %>%
DT::formatCurrency(
columns = c("total articles", "articles with a maplink", "total maplinks"),
currency = "", digits = 0
)
Map data allows users to store GeoJSON data on wiki, similar to images. Search for *.map within Data namespace and you get results like Data:Parramatta Light Rail.map:
Or if you search for *.tab within Data namespace, you’ll get tabular datasets like Data:Bea.gov/GDP by state.tab.
Let’s see how many of those there are:
(Query run on 11 September 2017.)
SELECT
CASE WHEN page_title RLIKE '\.map$' THEN 'map'
WHEN page_title RLIKE '\.tab$' THEN 'tabular'
ELSE 'other'
END AS data,
FORMAT(COUNT(*), 0) AS total
FROM page
WHERE page_namespace = 486
GROUP BY data;
data | total |
---|---|
map | 528 |
tabular | 269 |
Mapframes and maplinks are two ways that Wikimedia Maps can be included on a page when the Kartographer extension is enabled. Maplinks are also added to a page automatically through coordinates, Object Location, and Location templates as part of geocoding on Commons. For example, as of 14 July 2017, File:RNV HEAG-Dampf Schleifenbogen 1.JPG includes a mapframe in the description, and File:Paddestoel 003.jpg includes camera location which results in a maplink.
(Query run on 11 September 2017.)
SELECT
FORMAT(COUNT(*), 0) AS `total files`,
FORMAT(SUM(IF(mapframes > 0, 1, 0)), 0) AS `files with a mapframe`,
CONCAT(ROUND(100*SUM(IF(mapframes > 0, 1, 0))/COUNT(*),2),'%') AS `mapframe prevalence`,
FORMAT(SUM(mapframes), 0) AS `total mapframes`,
FORMAT(SUM(IF(maplinks > 0, 1, 0)), 0) AS `files with a maplink`,
CONCAT(ROUND(100*SUM(IF(maplinks > 0, 1, 0))/COUNT(*),2),'%') AS `maplink prevalence`,
FORMAT(SUM(maplinks), 0) AS `total maplinks`
FROM (
SELECT
page_id,
SUM(IF(type = 'mapframe', instances, 0)) AS mapframes,
SUM(IF(type = 'maplink', instances, 0)) AS maplinks
FROM (
SELECT p.page_id,
CASE kartographer.pp_propname
WHEN 'kartographer_frames' THEN 'mapframe'
WHEN 'kartographer_links' THEN 'maplink'
END AS type,
kartographer.pp_value AS instances
FROM (
SELECT *
FROM page_props
WHERE pp_propname IN('kartographer_frames', 'kartographer_links')
AND pp_value > 0
) AS kartographer
RIGHT JOIN (
SELECT page_id
FROM page
WHERE page_namespace = 6
AND page_is_redirect = 0
) p
ON p.page_id = kartographer.pp_page
) AS joined_tables
GROUP BY page_id
) counts;
total files | files with a mapframe | mapframe prevalence | total mapframes | files with a maplink | maplink prevalence | total maplinks |
---|---|---|---|---|---|---|
41,894,008 | 5 | 0.00% | 5 | 9,786,138 | 23.36% | 9,852,870 |
Approximately 23% (~9.8M) of 42M files on Commons include a maplink.
This report was compiled using RMarkdown, knitr, and an open SSH tunnel for connecting to our databases:
ssh -N stat6 -L 3307:analytics-store.eqiad.wmnet:3306
When figuring stuff out (e.g. what it looks like in the database when a page has a map) and working with page IDs, the MediaWiki API can be used to get a page title from a page ID:
https://commons.wikimedia.org/w/api.php?action=query&prop=revisions&rvprop=content&format=jsonfm&pageids=ID1|ID2