import wmfdata
# Or get a totally customizable SparkSession using get_custom_session.
spark = wmfdata.spark.create_custom_session(
master='yarn',
spark_config={
# 16g because we have many partition
"spark.driver.memory": "16g",
"spark.driver.cores": 2,
"spark.executor.memory": "4g",
"spark.executor.cores": 2,
#"spark.executor.memoryOverhead": "1g",
"spark.sql.shuffle.partitions": 512,
'spark.dynamicAllocation.maxExecutors': 128,
'spark.locality.wait': '1s' # test 0
}
)
from pyspark.sql.functions import col
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql.window import Window
SPARK_HOME: /usr/lib/spark3 Using Hadoop client lib jars at 3.2.0, provided by Spark. PYSPARK_PYTHON=/opt/conda-analytics/bin/python3
Setting default log level to "WARN". To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). 23/07/28 15:03:33 WARN SparkConf: Note that spark.local.dir will be overridden by the value set by the cluster manager (via SPARK_LOCAL_DIRS in mesos/standalone/kubernetes and LOCAL_DIRS in YARN). 23/07/28 15:03:33 WARN Utils: Service 'sparkDriver' could not bind on port 12000. Attempting port 12001. 23/07/28 15:03:33 WARN Utils: Service 'sparkDriver' could not bind on port 12001. Attempting port 12002. 23/07/28 15:03:33 WARN Utils: Service 'sparkDriver' could not bind on port 12002. Attempting port 12003. 23/07/28 15:03:33 WARN Utils: Service 'sparkDriver' could not bind on port 12003. Attempting port 12004. 23/07/28 15:03:33 WARN Utils: Service 'sparkDriver' could not bind on port 12004. Attempting port 12005. 23/07/28 15:03:34 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041. 23/07/28 15:03:34 WARN Utils: Service 'SparkUI' could not bind on port 4041. Attempting port 4042. 23/07/28 15:03:34 WARN Utils: Service 'SparkUI' could not bind on port 4042. Attempting port 4043. 23/07/28 15:03:34 WARN Utils: Service 'SparkUI' could not bind on port 4043. Attempting port 4044. 23/07/28 15:03:34 WARN Utils: Service 'SparkUI' could not bind on port 4044. Attempting port 4045. 23/07/28 15:03:46 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13000. Attempting port 13001. 23/07/28 15:03:46 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13001. Attempting port 13002. 23/07/28 15:03:46 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13002. Attempting port 13003. 23/07/28 15:03:46 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13003. Attempting port 13004. 23/07/28 15:03:46 WARN Utils: Service 'org.apache.spark.network.netty.NettyBlockTransferService' could not bind on port 13004. Attempting port 13005. 23/07/28 15:03:46 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Attempted to request executors before the AM has registered!
data = spark.table('discovery.wikibase_rdf').where('wiki="wikidata" AND `date` = "20230724"').alias("data");
wdt_p31 = '<http://www.wikidata.org/prop/direct/P31>'
Q13442814 = '<http://www.wikidata.org/entity/Q13442814>'
value_prefix = '<http://www.wikidata.org/value/'
ref_prefix = '<http://www.wikidata.org/reference/'
onto_value = '<http://wikiba.se/ontology#Value>'
onto_ref = '<http://wikiba.se/ontology#Value>'
# sparql equivalent: select (?q as ?schol_art_QID) where { ?q wdt:P31 wd:Q13442814 . }
S = (data
.select(col("subject").alias("schol_art_QID"))
.filter(col("predicate") == wdt_p31)
.filter(col("object") == Q13442814)
.alias("S"))
S.show(1, False)
[Stage 3:> (0 + 1) / 1]
+------------------------------------------+ |subject | +------------------------------------------+ |<http://www.wikidata.org/entity/Q58657074>| +------------------------------------------+ only showing top 1 row
# sparql equivalent: hard
# it's not obvious to collect of triples related to an entity,
# here we use the "context" that should group them into their belonging entity
# except ofcourse for shared references and values that we have to count separately
S_direct_triples = (data
.join(S, (S["schol_art_QID"] == data["context"]), 'inner')
.select("data.*")
.cache())
not_S_direct_triples = (data
.join(S, (S["schol_art_QID"] == data["context"]), 'leftanti')
.select("data.*")
.cache())
S_direct_triples.count()
23/07/28 15:27:55 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 74 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:27:55 ERROR YarnScheduler: Lost executor 74 on an-worker1143.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:27:55 WARN TaskSetManager: Lost task 2292.0 in stage 4.0 (TID 577) (an-worker1143.eqiad.wmnet executor 74): ExecutorLostFailure (executor 74 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:27:55 WARN TaskSetManager: Lost task 1474.0 in stage 4.0 (TID 369) (an-worker1143.eqiad.wmnet executor 74): ExecutorLostFailure (executor 74 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:03 ERROR YarnScheduler: Lost executor 81 on an-worker1103.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:03 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 81 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:03 WARN TaskSetManager: Lost task 1221.0 in stage 4.0 (TID 891) (an-worker1103.eqiad.wmnet executor 81): ExecutorLostFailure (executor 81 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:03 WARN TaskSetManager: Lost task 1163.0 in stage 4.0 (TID 813) (an-worker1103.eqiad.wmnet executor 81): ExecutorLostFailure (executor 81 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:06 ERROR YarnScheduler: Lost executor 125 on an-worker1086.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:06 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 125 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:06 WARN TaskSetManager: Lost task 1720.0 in stage 4.0 (TID 1000) (an-worker1086.eqiad.wmnet executor 125): ExecutorLostFailure (executor 125 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:06 WARN TaskSetManager: Lost task 1776.0 in stage 4.0 (TID 1023) (an-worker1086.eqiad.wmnet executor 125): ExecutorLostFailure (executor 125 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:07 ERROR YarnScheduler: Lost executor 20 on an-worker1118.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:07 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 20 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:07 WARN TaskSetManager: Lost task 1010.0 in stage 4.0 (TID 1078) (an-worker1118.eqiad.wmnet executor 20): ExecutorLostFailure (executor 20 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:07 WARN TaskSetManager: Lost task 1018.0 in stage 4.0 (TID 1134) (an-worker1118.eqiad.wmnet executor 20): ExecutorLostFailure (executor 20 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:15 ERROR YarnScheduler: Lost executor 43 on an-worker1125.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:15 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 43 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:15 WARN TaskSetManager: Lost task 2931.0 in stage 4.0 (TID 1392) (an-worker1125.eqiad.wmnet executor 43): ExecutorLostFailure (executor 43 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:15 WARN TaskSetManager: Lost task 2568.0 in stage 4.0 (TID 1296) (an-worker1125.eqiad.wmnet executor 43): ExecutorLostFailure (executor 43 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:22 ERROR YarnScheduler: Lost executor 63 on an-worker1107.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:22 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 63 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:22 WARN TaskSetManager: Lost task 665.0 in stage 4.0 (TID 1381) (an-worker1107.eqiad.wmnet executor 63): ExecutorLostFailure (executor 63 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:22 WARN TaskSetManager: Lost task 1466.0 in stage 4.0 (TID 1809) (an-worker1107.eqiad.wmnet executor 63): ExecutorLostFailure (executor 63 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:24 ERROR YarnScheduler: Lost executor 30 on an-worker1123.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:24 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 30 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:24 WARN TaskSetManager: Lost task 1898.0 in stage 4.0 (TID 1909) (an-worker1123.eqiad.wmnet executor 30): ExecutorLostFailure (executor 30 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:24 WARN TaskSetManager: Lost task 1773.0 in stage 4.0 (TID 1872) (an-worker1123.eqiad.wmnet executor 30): ExecutorLostFailure (executor 30 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 ERROR YarnScheduler: Lost executor 104 on an-worker1105.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 104 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 WARN TaskSetManager: Lost task 2957.0 in stage 4.0 (TID 2111) (an-worker1105.eqiad.wmnet executor 104): ExecutorLostFailure (executor 104 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 WARN TaskSetManager: Lost task 2795.0 in stage 4.0 (TID 2041) (an-worker1105.eqiad.wmnet executor 104): ExecutorLostFailure (executor 104 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 ERROR YarnScheduler: Lost executor 117 on an-worker1117.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 117 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 WARN TaskSetManager: Lost task 2858.0 in stage 4.0 (TID 2039) (an-worker1117.eqiad.wmnet executor 117): ExecutorLostFailure (executor 117 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:29 WARN TaskSetManager: Lost task 3290.0 in stage 4.0 (TID 2071) (an-worker1117.eqiad.wmnet executor 117): ExecutorLostFailure (executor 117 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 ERROR YarnScheduler: Lost executor 119 on an-worker1117.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 WARN TaskSetManager: Lost task 3404.0 in stage 4.0 (TID 2093) (an-worker1117.eqiad.wmnet executor 119): ExecutorLostFailure (executor 119 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 119 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 WARN TaskSetManager: Lost task 3582.0 in stage 4.0 (TID 2204) (an-worker1117.eqiad.wmnet executor 119): ExecutorLostFailure (executor 119 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 78 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 ERROR YarnScheduler: Lost executor 78 on an-worker1143.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 WARN TaskSetManager: Lost task 436.0 in stage 4.0 (TID 2147) (an-worker1143.eqiad.wmnet executor 78): ExecutorLostFailure (executor 78 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:30 WARN TaskSetManager: Lost task 442.0 in stage 4.0 (TID 2233) (an-worker1143.eqiad.wmnet executor 78): ExecutorLostFailure (executor 78 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:32 ERROR YarnScheduler: Lost executor 68 on an-worker1134.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:32 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 68 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:32 WARN TaskSetManager: Lost task 1212.0 in stage 4.0 (TID 2145) (an-worker1134.eqiad.wmnet executor 68): ExecutorLostFailure (executor 68 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:32 WARN TaskSetManager: Lost task 2957.1 in stage 4.0 (TID 2193) (an-worker1134.eqiad.wmnet executor 68): ExecutorLostFailure (executor 68 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:35 ERROR YarnScheduler: Lost executor 21 on an-worker1118.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:35 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 21 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:35 WARN TaskSetManager: Lost task 1941.0 in stage 4.0 (TID 2339) (an-worker1118.eqiad.wmnet executor 21): ExecutorLostFailure (executor 21 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:35 WARN TaskSetManager: Lost task 1934.0 in stage 4.0 (TID 2276) (an-worker1118.eqiad.wmnet executor 21): ExecutorLostFailure (executor 21 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:37 ERROR YarnScheduler: Lost executor 118 on an-worker1117.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:37 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 118 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:37 WARN TaskSetManager: Lost task 2321.0 in stage 4.0 (TID 2385) (an-worker1117.eqiad.wmnet executor 118): ExecutorLostFailure (executor 118 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:37 WARN TaskSetManager: Lost task 1716.0 in stage 4.0 (TID 2280) (an-worker1117.eqiad.wmnet executor 118): ExecutorLostFailure (executor 118 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:40 ERROR YarnScheduler: Lost executor 113 on an-worker1117.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:40 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 113 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:40 WARN TaskSetManager: Lost task 2738.0 in stage 4.0 (TID 2493) (an-worker1117.eqiad.wmnet executor 113): ExecutorLostFailure (executor 113 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:40 WARN TaskSetManager: Lost task 2322.0 in stage 4.0 (TID 2414) (an-worker1117.eqiad.wmnet executor 113): ExecutorLostFailure (executor 113 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:44 ERROR YarnScheduler: Lost executor 115 on an-worker1117.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:44 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 115 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:44 WARN TaskSetManager: Lost task 3055.0 in stage 4.0 (TID 2622) (an-worker1117.eqiad.wmnet executor 115): ExecutorLostFailure (executor 115 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:44 WARN TaskSetManager: Lost task 3352.0 in stage 4.0 (TID 2636) (an-worker1117.eqiad.wmnet executor 115): ExecutorLostFailure (executor 115 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:45 ERROR YarnScheduler: Lost executor 10 on an-worker1146.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:45 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 10 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:45 WARN TaskSetManager: Lost task 1261.0 in stage 4.0 (TID 2783) (an-worker1146.eqiad.wmnet executor 10): ExecutorLostFailure (executor 10 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:45 WARN TaskSetManager: Lost task 1198.0 in stage 4.0 (TID 2760) (an-worker1146.eqiad.wmnet executor 10): ExecutorLostFailure (executor 10 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 ERROR YarnScheduler: Lost executor 85 on an-worker1079.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 85 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN TaskSetManager: Lost task 1864.0 in stage 4.0 (TID 3053) (an-worker1079.eqiad.wmnet executor 85): ExecutorLostFailure (executor 85 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN TaskSetManager: Lost task 3557.0 in stage 4.0 (TID 2860) (an-worker1079.eqiad.wmnet executor 85): ExecutorLostFailure (executor 85 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 ERROR YarnScheduler: Lost executor 42 on an-worker1118.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 42 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN TaskSetManager: Lost task 2011.0 in stage 4.0 (TID 3070) (an-worker1118.eqiad.wmnet executor 42): ExecutorLostFailure (executor 42 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN TaskSetManager: Lost task 2180.0 in stage 4.0 (TID 2573) (an-worker1118.eqiad.wmnet executor 42): ExecutorLostFailure (executor 42 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 ERROR YarnScheduler: Lost executor 105 on an-worker1082.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN TaskSetManager: Lost task 3374.0 in stage 4.0 (TID 3127) (an-worker1082.eqiad.wmnet executor 105): ExecutorLostFailure (executor 105 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:53 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 105 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:28:54 WARN TaskSetManager: Lost task 3129.0 in stage 4.0 (TID 3043) (an-worker1082.eqiad.wmnet executor 105): ExecutorLostFailure (executor 105 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 ERROR YarnScheduler: Lost executor 59 on an-worker1107.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 59 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 WARN TaskSetManager: Lost task 3022.0 in stage 4.0 (TID 3455) (an-worker1107.eqiad.wmnet executor 59): ExecutorLostFailure (executor 59 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 WARN TaskSetManager: Lost task 3158.0 in stage 4.0 (TID 3496) (an-worker1107.eqiad.wmnet executor 59): ExecutorLostFailure (executor 59 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 ERROR YarnScheduler: Lost executor 84 on an-worker1119.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 84 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 WARN TaskSetManager: Lost task 3406.0 in stage 4.0 (TID 3574) (an-worker1119.eqiad.wmnet executor 84): ExecutorLostFailure (executor 84 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:06 WARN TaskSetManager: Lost task 3168.0 in stage 4.0 (TID 3502) (an-worker1119.eqiad.wmnet executor 84): ExecutorLostFailure (executor 84 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:09 ERROR YarnScheduler: Lost executor 14 on analytics1074.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:09 WARN TaskSetManager: Lost task 3485.0 in stage 4.0 (TID 3581) (analytics1074.eqiad.wmnet executor 14): ExecutorLostFailure (executor 14 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:09 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 14 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:09 WARN TaskSetManager: Lost task 3263.0 in stage 4.0 (TID 3526) (analytics1074.eqiad.wmnet executor 14): ExecutorLostFailure (executor 14 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:11 ERROR YarnScheduler: Lost executor 31 on an-worker1118.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:11 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 31 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:11 WARN TaskSetManager: Lost task 2560.0 in stage 4.0 (TID 3270) (an-worker1118.eqiad.wmnet executor 31): ExecutorLostFailure (executor 31 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:11 WARN TaskSetManager: Lost task 9.0 in stage 5.0 (TID 3689) (an-worker1118.eqiad.wmnet executor 31): ExecutorLostFailure (executor 31 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:19 ERROR YarnScheduler: Lost executor 129 on an-worker1103.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:19 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 129 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:19 WARN TaskSetManager: Lost task 2649.0 in stage 4.0 (TID 3313) (an-worker1103.eqiad.wmnet executor 129): ExecutorLostFailure (executor 129 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:19 WARN TaskSetManager: Lost task 258.0 in stage 5.0 (TID 3919) (an-worker1103.eqiad.wmnet executor 129): ExecutorLostFailure (executor 129 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:23 ERROR YarnScheduler: Lost executor 39 on an-worker1125.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:23 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 39 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:23 WARN TaskSetManager: Lost task 3335.0 in stage 4.0 (TID 3523) (an-worker1125.eqiad.wmnet executor 39): ExecutorLostFailure (executor 39 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:23 WARN TaskSetManager: Lost task 952.0 in stage 5.0 (TID 4135) (an-worker1125.eqiad.wmnet executor 39): ExecutorLostFailure (executor 39 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:33 ERROR YarnScheduler: Lost executor 67 on an-worker1140.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:33 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 67 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:33 WARN TaskSetManager: Lost task 3268.0 in stage 4.0 (TID 3536) (an-worker1140.eqiad.wmnet executor 67): ExecutorLostFailure (executor 67 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:29:33 WARN TaskSetManager: Lost task 664.0 in stage 5.0 (TID 4551) (an-worker1140.eqiad.wmnet executor 67): ExecutorLostFailure (executor 67 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:03 ERROR YarnScheduler: Lost executor 25 on an-worker1141.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:03 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 25 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:03 WARN TaskSetManager: Lost task 3081.0 in stage 5.0 (TID 6055) (an-worker1141.eqiad.wmnet executor 25): ExecutorLostFailure (executor 25 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:03 WARN TaskSetManager: Lost task 3112.0 in stage 5.0 (TID 6070) (an-worker1141.eqiad.wmnet executor 25): ExecutorLostFailure (executor 25 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:17 ERROR YarnScheduler: Lost executor 36 on an-worker1095.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:17 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 36 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:17 WARN TaskSetManager: Lost task 2762.0 in stage 5.0 (TID 6821) (an-worker1095.eqiad.wmnet executor 36): ExecutorLostFailure (executor 36 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:17 WARN TaskSetManager: Lost task 2622.0 in stage 5.0 (TID 6795) (an-worker1095.eqiad.wmnet executor 36): ExecutorLostFailure (executor 36 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:24 ERROR YarnScheduler: Lost executor 18 on an-worker1111.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:24 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 18 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:24 WARN TaskSetManager: Lost task 3401.0 in stage 5.0 (TID 7165) (an-worker1111.eqiad.wmnet executor 18): ExecutorLostFailure (executor 18 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:24 WARN TaskSetManager: Lost task 3267.0 in stage 5.0 (TID 7138) (an-worker1111.eqiad.wmnet executor 18): ExecutorLostFailure (executor 18 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:25 ERROR YarnScheduler: Lost executor 27 on an-worker1078.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:25 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 27 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:25 WARN TaskSetManager: Lost task 3083.0 in stage 5.0 (TID 7101) (an-worker1078.eqiad.wmnet executor 27): ExecutorLostFailure (executor 27 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:30:25 WARN TaskSetManager: Lost task 3199.0 in stage 5.0 (TID 7121) (an-worker1078.eqiad.wmnet executor 27): ExecutorLostFailure (executor 27 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:32:51 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 102 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:32:51 ERROR YarnScheduler: Lost executor 102 on an-worker1105.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:32:51 WARN TaskSetManager: Lost task 10.0 in stage 6.0 (TID 7244) (an-worker1105.eqiad.wmnet executor 102): ExecutorLostFailure (executor 102 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:32:51 WARN TaskSetManager: Lost task 3.0 in stage 6.0 (TID 7237) (an-worker1105.eqiad.wmnet executor 102): ExecutorLostFailure (executor 102 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:05 ERROR YarnScheduler: Lost executor 50 on an-worker1107.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:05 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 50 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:05 WARN TaskSetManager: Lost task 5.0 in stage 6.0 (TID 7239) (an-worker1107.eqiad.wmnet executor 50): ExecutorLostFailure (executor 50 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:05 WARN TaskSetManager: Lost task 12.0 in stage 6.0 (TID 7246) (an-worker1107.eqiad.wmnet executor 50): ExecutorLostFailure (executor 50 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:49 ERROR YarnScheduler: Lost executor 65 on an-worker1108.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:49 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 65 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:49 WARN TaskSetManager: Lost task 13.0 in stage 6.0 (TID 7247) (an-worker1108.eqiad.wmnet executor 65): ExecutorLostFailure (executor 65 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:33:49 WARN TaskSetManager: Lost task 6.0 in stage 6.0 (TID 7240) (an-worker1108.eqiad.wmnet executor 65): ExecutorLostFailure (executor 65 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:30 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 7 for reason Container killed by YARN for exceeding physical memory limits. 4.5 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:30 ERROR YarnScheduler: Lost executor 7 on an-worker1089.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.5 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:30 WARN TaskSetManager: Lost task 333.0 in stage 6.0 (TID 7573) (an-worker1089.eqiad.wmnet executor 7): ExecutorLostFailure (executor 7 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.5 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:30 WARN TaskSetManager: Lost task 315.0 in stage 6.0 (TID 7555) (an-worker1089.eqiad.wmnet executor 7): ExecutorLostFailure (executor 7 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.5 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:55 ERROR YarnScheduler: Lost executor 240 on an-worker1148.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:55 WARN TaskSetManager: Lost task 446.0 in stage 6.0 (TID 7688) (an-worker1148.eqiad.wmnet executor 240): ExecutorLostFailure (executor 240 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:55 WARN TaskSetManager: Lost task 447.0 in stage 6.0 (TID 7689) (an-worker1148.eqiad.wmnet executor 240): ExecutorLostFailure (executor 240 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:34:55 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 240 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:17 ERROR YarnScheduler: Lost executor 283 on an-worker1135.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:17 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 283 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:17 WARN TaskSetManager: Lost task 399.0 in stage 6.0 (TID 7641) (an-worker1135.eqiad.wmnet executor 283): ExecutorLostFailure (executor 283 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:17 WARN TaskSetManager: Lost task 332.0 in stage 6.0 (TID 7572) (an-worker1135.eqiad.wmnet executor 283): ExecutorLostFailure (executor 283 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:21 ERROR YarnScheduler: Lost executor 226 on an-worker1120.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:21 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 226 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:21 WARN TaskSetManager: Lost task 436.0 in stage 6.0 (TID 7678) (an-worker1120.eqiad.wmnet executor 226): ExecutorLostFailure (executor 226 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:21 WARN TaskSetManager: Lost task 432.0 in stage 6.0 (TID 7674) (an-worker1120.eqiad.wmnet executor 226): ExecutorLostFailure (executor 226 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:35 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 244 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:35 ERROR YarnScheduler: Lost executor 244 on an-worker1143.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:35 WARN TaskSetManager: Lost task 352.0 in stage 6.0 (TID 7592) (an-worker1143.eqiad.wmnet executor 244): ExecutorLostFailure (executor 244 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:35:35 WARN TaskSetManager: Lost task 328.0 in stage 6.0 (TID 7568) (an-worker1143.eqiad.wmnet executor 244): ExecutorLostFailure (executor 244 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:00 ERROR YarnScheduler: Lost executor 274 on an-worker1103.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:00 WARN TaskSetManager: Lost task 292.0 in stage 6.0 (TID 7532) (an-worker1103.eqiad.wmnet executor 274): ExecutorLostFailure (executor 274 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:00 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 274 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:15 ERROR YarnScheduler: Lost executor 196 on an-worker1115.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:15 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 196 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:15 WARN TaskSetManager: Lost task 447.1 in stage 6.0 (TID 7708) (an-worker1115.eqiad.wmnet executor 196): ExecutorLostFailure (executor 196 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:15 WARN TaskSetManager: Lost task 463.0 in stage 6.0 (TID 7705) (an-worker1115.eqiad.wmnet executor 196): ExecutorLostFailure (executor 196 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:21 ERROR YarnScheduler: Lost executor 214 on an-worker1124.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:21 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 214 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:21 WARN TaskSetManager: Lost task 485.0 in stage 6.0 (TID 7729) (an-worker1124.eqiad.wmnet executor 214): ExecutorLostFailure (executor 214 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:21 WARN TaskSetManager: Lost task 479.0 in stage 6.0 (TID 7723) (an-worker1124.eqiad.wmnet executor 214): ExecutorLostFailure (executor 214 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:24 ERROR YarnScheduler: Lost executor 197 on an-worker1102.eqiad.wmnet: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:24 WARN YarnSchedulerBackend$YarnSchedulerEndpoint: Requesting driver to remove executor 197 for reason Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:24 WARN TaskSetManager: Lost task 455.0 in stage 6.0 (TID 7697) (an-worker1102.eqiad.wmnet executor 197): ExecutorLostFailure (executor 197 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead. 23/07/28 15:36:24 WARN TaskSetManager: Lost task 449.0 in stage 6.0 (TID 7691) (an-worker1102.eqiad.wmnet executor 197): ExecutorLostFailure (executor 197 exited caused by one of the running tasks) Reason: Container killed by YARN for exceeding physical memory limits. 4.4 GB of 4.4 GB physical memory used. Consider boosting spark.executor.memoryOverhead.
7197909125
## SHARED VALUES
# values id used by S
V_ids_for_S = (S_direct_triples
.filter(col("object").startswith(value_prefix))
.select(col("object").alias("val_id"))
.distinct()
.cache())
# values id used by "not S"
V_ids_for_not_S = (S_direct_triples
.filter(col("object").startswith(value_prefix))
.select(col("object").alias("val_id"))
.distinct()
.cache())
# values id not shared between S and not S
V_only_for_S = V_ids_for_S.join(V_ids_for_not_S, 'val_id', "leftanti")
# triples for shared values used by S
values_triples_used_by_S = (data
.filter(col("context") == onto_value)
.join(V_ids_for_S, (V_ids_for_S["val_id"] == data["subject"]), "inner")
.select("data.*"))
# triples for shared values *only* used by S
values_triples_used_by_S = (data
.filter(col("context") == onto_value)
.join(V_only_for_S, (V_ids_for_S["val_id"] == data["subject"]), "inner")
.select("data.*"))
# TODO Print the counts
23/07/28 15:59:21 WARN CacheManager: Asked to cache already cached data. 23/07/28 15:59:21 WARN CacheManager: Asked to cache already cached data.
## TODO SHARED REFERENCES