| task,task_display,variant,variant_display,status,primary_metric,primary_value,higher_is_better,feature_dim,num_train,num_test,input_contract,target_variant,reason,accuracy,macro_f1,balanced_accuracy,num_classes,unseen_test_classes,unseen_test_class_count,mse,mae,r2,target_dim,micro_f1,exact_match,precision,recall,num_objects,mrr,top1_accuracy,top5_accuracy,top10_accuracy,median_rank,mean_rank,num_queries |
| timeline_action,Current Action Recognition,all_handcrafted_audio,All Current Features,computed,macro_f1,0.00905456968081885,true,8546,813,348,task contract feature blocks with handcrafted AAC audio where applicable (8546 dims),,,0.017241379310344827,0.00905456968081885,0.01720647773279352,19,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_action,Current Action Recognition,all_except_audio,All Except Audio,computed,macro_f1,0.008771929824561405,true,8378,813,348,same task contract with handcrafted AAC audio columns removed (8378 dims),,,0.020114942528735632,0.008771929824561405,0.005668016194331984,19,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_action,Current Action Recognition,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,0.006925207756232688,true,168,813,348,handcrafted AAC audio block only (168 dims),,,0.014367816091954023,0.006925207756232688,0.004048582995951417,19,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_action,Current Action Recognition,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,0.0,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,0.0,0.0,0.0,19,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_action,Current Action Recognition,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,0.0013495276653171392,true,8966,813,348,task contract with handcrafted AAC removed and raw log-mel added (8966 dims),,,0.0028735632183908046,0.0013495276653171392,0.0008097165991902835,19,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_action,Current Action Recognition,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,0.002734107997265892,true,9134,813,348,task contract with existing handcrafted AAC plus raw log-mel (9134 dims),,,0.005747126436781609,0.002734107997265892,0.001619433198380567,19,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_subtask,Current Subtask Recognition,all_handcrafted_audio,All Current Features,computed,macro_f1,0.011256354393609296,true,8546,813,348,task contract feature blocks with handcrafted AAC audio where applicable (8546 dims),,,0.02586206896551724,0.011256354393609296,0.02788220551378446,15,"['Move bottle to coffee equipment', 'Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_subtask,Current Subtask Recognition,all_except_audio,All Except Audio,computed,macro_f1,0.0111731843575419,true,8378,813,348,same task contract with handcrafted AAC audio columns removed (8378 dims),,,0.040229885057471264,0.0111731843575419,0.017543859649122806,15,"['Move bottle to coffee equipment', 'Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_subtask,Current Subtask Recognition,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,0.016194331983805668,true,168,813,348,handcrafted AAC audio block only (168 dims),,,0.022988505747126436,0.016194331983805668,0.010796221322537112,15,"['Move bottle to coffee equipment', 'Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_subtask,Current Subtask Recognition,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,0.0016722408026755855,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,0.0028735632183908046,0.0016722408026755855,0.001349527665317139,15,"['Move bottle to coffee equipment', 'Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_subtask,Current Subtask Recognition,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,0.0008257638315441783,true,8966,813,348,task contract with handcrafted AAC removed and raw log-mel added (8966 dims),,,0.0028735632183908046,0.0008257638315441783,0.0012531328320802004,15,"['Move bottle to coffee equipment', 'Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| timeline_subtask,Current Subtask Recognition,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,0.0017889087656529517,true,9134,813,348,task contract with existing handcrafted AAC plus raw log-mel (9134 dims),,,0.005747126436781609,0.0017889087656529517,0.002699055330634278,15,"['Move bottle to coffee equipment', 'Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| transition_detection,Action Transition Detection,all_handcrafted_audio,All Current Features,computed,macro_f1,0.46213292117465227,true,8546,813,348,task contract feature blocks with handcrafted AAC audio where applicable (8546 dims),,,0.8591954022988506,0.46213292117465227,0.4503012048192771,2,[],0,,,,,,,,,,,,,,,, |
| transition_detection,Action Transition Detection,all_except_audio,All Except Audio,computed,macro_f1,0.46870229007633585,true,8378,813,348,same task contract with handcrafted AAC audio columns removed (8378 dims),,,0.882183908045977,0.46870229007633585,0.4623493975903614,2,[],0,,,,,,,,,,,,,,,, |
| transition_detection,Action Transition Detection,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,0.48444444444444446,true,168,813,348,handcrafted AAC audio block only (168 dims),,,0.9396551724137931,0.48444444444444446,0.4924698795180723,2,[],0,,,,,,,,,,,,,,,, |
| transition_detection,Action Transition Detection,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,0.4637904468412942,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,0.8649425287356322,0.4637904468412942,0.45331325301204817,2,[],0,,,,,,,,,,,,,,,, |
| transition_detection,Action Transition Detection,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,0.4792100707180375,true,8966,813,348,task contract with handcrafted AAC removed and raw log-mel added (8966 dims),,,0.853448275862069,0.4792100707180375,0.4770331325301205,2,[],0,,,,,,,,,,,,,,,, |
| transition_detection,Action Transition Detection,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,0.4816233470132239,true,9134,813,348,task contract with existing handcrafted AAC plus raw log-mel (9134 dims),,,0.8591954022988506,0.4816233470132239,0.48004518072289154,2,[],0,,,,,,,,,,,,,,,, |
| next_action,Next-Action Prediction,all_handcrafted_audio,All Current Features,computed,macro_f1,0.01058201058201058,true,8546,813,348,task contract feature blocks with handcrafted AAC audio where applicable (8546 dims),future action from annotation frame labels,,0.022988505747126436,0.01058201058201058,0.007407407407407408,18,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| next_action,Next-Action Prediction,all_except_audio,All Except Audio,computed,macro_f1,0.010709504685408301,true,8378,813,348,same task contract with handcrafted AAC audio columns removed (8378 dims),future action from annotation frame labels,,0.022988505747126436,0.010709504685408301,0.007407407407407408,18,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| next_action,Next-Action Prediction,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,0.008561643835616438,true,168,813,348,handcrafted AAC audio block only (168 dims),future action from annotation frame labels,,0.014367816091954023,0.008561643835616438,0.005208333333333333,18,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| next_action,Next-Action Prediction,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,0.0017301038062283738,true,588,813,348,raw waveform log-mel embedding only (588 dims),future action from annotation frame labels,,0.0028735632183908046,0.0017301038062283738,0.000980392156862745,18,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| next_action,Next-Action Prediction,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,0.006006006006006006,true,8966,813,348,task contract with handcrafted AAC removed and raw log-mel added (8966 dims),future action from annotation frame labels,,0.011494252873563218,0.006006006006006006,0.003703703703703704,18,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| next_action,Next-Action Prediction,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,0.0058479532163742695,true,9134,813,348,task contract with existing handcrafted AAC plus raw log-mel (9134 dims),future action from annotation frame labels,,0.011494252873563218,0.0058479532163742695,0.003703703703703704,18,"['Place item on table', 'Wait/Prepare for pouring', 'Pour coffee', 'Pour milk into coffee']",4,,,,,,,,,,,,,,,, |
| hand_trajectory_forecast,Future Hand Motion Forecasting,all_handcrafted_audio,All Current Features,computed,mae,4.466395378112793,false,8546,811,348,task contract feature blocks with handcrafted AAC audio where applicable (8546 dims),future hand joints from annotation.hdf5,,,,,,,,1557.2276611328125,4.466395378112793,-1588.9305967247317,1260,,,,,,,,,,,, |
| hand_trajectory_forecast,Future Hand Motion Forecasting,all_except_audio,All Except Audio,computed,mae,4.303755283355713,false,8378,811,348,same task contract with handcrafted AAC audio columns removed (8378 dims),future hand joints from annotation.hdf5,,,,,,,,1229.3953857421875,4.303755283355713,-1254.2135816756352,1260,,,,,,,,,,,, |
| hand_trajectory_forecast,Future Hand Motion Forecasting,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,mae,1.1956232786178589,false,168,811,348,handcrafted AAC audio block only (168 dims),future hand joints from annotation.hdf5,,,,,,,,3.1508853435516357,1.1956232786178589,-2.2170563799739673,1260,,,,,,,,,,,, |
| hand_trajectory_forecast,Future Hand Motion Forecasting,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,mae,3.1172122955322266,false,588,811,348,raw waveform log-mel embedding only (588 dims),future hand joints from annotation.hdf5,,,,,,,,63.60802459716797,3.1172122955322266,-63.94383890504609,1260,,,,,,,,,,,, |
| hand_trajectory_forecast,Future Hand Motion Forecasting,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,mae,4.305870532989502,false,8966,811,348,task contract with handcrafted AAC removed and raw log-mel added (8966 dims),future hand joints from annotation.hdf5,,,,,,,,1241.380126953125,4.305870532989502,-1266.4500920921807,1260,,,,,,,,,,,, |
| hand_trajectory_forecast,Future Hand Motion Forecasting,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,mae,4.1367621421813965,false,9134,811,348,task contract with existing handcrafted AAC plus raw log-mel (9134 dims),future hand joints from annotation.hdf5,,,,,,,,1217.459716796875,4.1367621421813965,-1242.0272923501784,1260,,,,,,,,,,,, |
| contact_prediction,Contact State Prediction,all_handcrafted_audio,All Current Features,computed,macro_f1,1.0,true,7503,813,348,task contract feature blocks with handcrafted AAC audio where applicable (7503 dims),,,1.0,1.0,1.0,1,[],0,,,,,,,,,,,,,,,, |
| contact_prediction,Contact State Prediction,all_except_audio,All Except Audio,computed,macro_f1,1.0,true,7335,813,348,same task contract with handcrafted AAC audio columns removed (7335 dims),,,1.0,1.0,1.0,1,[],0,,,,,,,,,,,,,,,, |
| contact_prediction,Contact State Prediction,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,1.0,true,168,813,348,handcrafted AAC audio block only (168 dims),,,1.0,1.0,1.0,1,[],0,,,,,,,,,,,,,,,, |
| contact_prediction,Contact State Prediction,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,1.0,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,1.0,1.0,1.0,1,[],0,,,,,,,,,,,,,,,, |
| contact_prediction,Contact State Prediction,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,1.0,true,7923,813,348,task contract with handcrafted AAC removed and raw log-mel added (7923 dims),,,1.0,1.0,1.0,1,[],0,,,,,,,,,,,,,,,, |
| contact_prediction,Contact State Prediction,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,1.0,true,8091,813,348,task contract with existing handcrafted AAC plus raw log-mel (8091 dims),,,1.0,1.0,1.0,1,[],0,,,,,,,,,,,,,,,, |
| object_relevance,Relevant Object Prediction,all_handcrafted_audio,All Current Features,computed,micro_f1,0.15813953488372093,true,7650,813,348,task contract feature blocks with handcrafted AAC audio where applicable (7650 dims),,,,0.05335536055344564,,,,,,,,,0.15813953488372093,0.011494252873563218,0.15368567454798332,0.16285924834193072,34,,,,,,, |
| object_relevance,Relevant Object Prediction,all_except_audio,All Except Audio,computed,micro_f1,0.14793328498912256,true,7482,813,348,same task contract with handcrafted AAC audio columns removed (7482 dims),,,,0.05137956064750565,,,,,,,,,0.14793328498912256,0.008620689655172414,0.145610278372591,0.1503316138540899,34,,,,,,, |
| object_relevance,Relevant Object Prediction,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,micro_f1,0.15894039735099336,true,168,813,348,handcrafted AAC audio block only (168 dims),,,,0.0640376063191102,,,,,,,,,0.15894039735099336,0.005747126436781609,0.16859504132231404,0.1503316138540899,34,,,,,,, |
| object_relevance,Relevant Object Prediction,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,micro_f1,0.15894868585732164,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,,0.06183171604594236,,,,,,,,,0.15894868585732164,0.0,0.13811854268624252,0.18717759764185704,34,,,,,,, |
| object_relevance,Relevant Object Prediction,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,micro_f1,0.17871759890859482,true,8070,813,348,task contract with handcrafted AAC removed and raw log-mel added (8070 dims),,,,0.05575181300519915,,,,,,,,,0.17871759890859482,0.0028735632183908046,0.16634920634920636,0.19307295504789979,34,,,,,,, |
| object_relevance,Relevant Object Prediction,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,micro_f1,0.18262653898768813,true,8238,813,348,task contract with existing handcrafted AAC plus raw log-mel (8238 dims),,,,0.05735108836010565,,,,,,,,,0.18262653898768813,0.0028735632183908046,0.17038927887683472,0.1967575534266765,34,,,,,,, |
| caption_grounding,Language-to-Time Grounding,all_handcrafted_audio,All Current Features,computed,mrr,0.03208567947149277,true,7650,813,348,task contract feature blocks with handcrafted AAC audio where applicable (7650 dims),,,,,,,,,,,,896,,,,,,0.03208567947149277,0.0028735632183908046,0.040229885057471264,0.06896551724137931,132.0,137.4022979736328,348 |
| caption_grounding,Language-to-Time Grounding,all_except_audio,All Except Audio,computed,mrr,0.027228528633713722,true,7482,813,348,same task contract with handcrafted AAC audio columns removed (7482 dims),,,,,,,,,,,,896,,,,,,0.027228528633713722,0.005747126436781609,0.028735632183908046,0.04597701149425287,134.0,142.62930297851562,348 |
| caption_grounding,Language-to-Time Grounding,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,mrr,0.03902389109134674,true,168,813,348,handcrafted AAC audio block only (168 dims),,,,,,,,,,,,896,,,,,,0.03902389109134674,0.011494252873563218,0.04885057471264368,0.07758620689655173,141.0,152.14942932128906,348 |
| caption_grounding,Language-to-Time Grounding,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,mrr,0.014815197326242924,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,,,,,,,,,,896,,,,,,0.014815197326242924,0.0,0.005747126436781609,0.022988505747126436,164.5,168.51437377929688,348 |
| caption_grounding,Language-to-Time Grounding,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,mrr,0.02484782598912716,true,8070,813,348,task contract with handcrafted AAC removed and raw log-mel added (8070 dims),,,,,,,,,,,,896,,,,,,0.02484782598912716,0.0028735632183908046,0.022988505747126436,0.05172413793103448,120.5,137.83908081054688,348 |
| caption_grounding,Language-to-Time Grounding,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,mrr,0.02719014883041382,true,8238,813,348,task contract with existing handcrafted AAC plus raw log-mel (8238 dims),,,,,,,,,,,,896,,,,,,0.02719014883041382,0.005747126436781609,0.02586206896551724,0.05747126436781609,116.0,136.37930297851562,348 |
| cross_modal_retrieval,Cross-Modal Window Retrieval,all_handcrafted_audio,All Current Features,computed,mrr,0.3751238286495209,true,2415,813,348,task contract feature blocks with handcrafted AAC audio where applicable (2415 dims),,,,,,,,,,,,5096,,,,,,0.3751238286495209,0.26436781609195403,0.47988505747126436,0.5545977011494253,6.5,25.83333396911621,348 |
| cross_modal_retrieval,Cross-Modal Window Retrieval,all_except_audio,All Except Audio,computed,mrr,0.38921058177948,true,2247,813,348,same task contract with handcrafted AAC audio columns removed (2247 dims),,,,,,,,,,,,5096,,,,,,0.38921058177948,0.28448275862068967,0.4827586206896552,0.5718390804597702,6.0,25.27298927307129,348 |
| cross_modal_retrieval,Cross-Modal Window Retrieval,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,mrr,0.02334633097052574,true,168,813,348,handcrafted AAC audio block only (168 dims),,,,,,,,,,,,5096,,,,,,0.02334633097052574,0.005747126436781609,0.014367816091954023,0.031609195402298854,152.5,161.44540405273438,348 |
| cross_modal_retrieval,Cross-Modal Window Retrieval,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,mrr,0.01806792803108692,true,588,813,348,raw waveform log-mel embedding only (588 dims),,,,,,,,,,,,5096,,,,,,0.01806792803108692,0.0028735632183908046,0.008620689655172414,0.022988505747126436,162.5,165.3275909423828,348 |
| cross_modal_retrieval,Cross-Modal Window Retrieval,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,mrr,0.32749155163764954,true,2835,813,348,task contract with handcrafted AAC removed and raw log-mel added (2835 dims),,,,,,,,,,,,5096,,,,,,0.32749155163764954,0.22126436781609196,0.4367816091954023,0.514367816091954,9.0,32.57183837890625,348 |
| cross_modal_retrieval,Cross-Modal Window Retrieval,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,mrr,0.31795138120651245,true,3003,813,348,task contract with existing handcrafted AAC plus raw log-mel (3003 dims),,,,,,,,,,,,5096,,,,,,0.31795138120651245,0.20689655172413793,0.4396551724137931,0.5287356321839081,8.5,33.75,348 |
| modality_reconstruction,Sensor-to-Visual Reconstruction,all_handcrafted_audio,All Current Features,computed,mae,9.79421329498291,false,2415,813,348,task contract feature blocks with handcrafted AAC audio where applicable (2415 dims),,,,,,,,,13864.333984375,9.79421329498291,-0.6094599339266962,5096,,,,,,,,,,,, |
| modality_reconstruction,Sensor-to-Visual Reconstruction,all_except_audio,All Except Audio,computed,mae,10.446661949157715,false,2247,813,348,same task contract with handcrafted AAC audio columns removed (2247 dims),,,,,,,,,14634.3974609375,10.446661949157715,-0.6988538186806226,5096,,,,,,,,,,,, |
| modality_reconstruction,Sensor-to-Visual Reconstruction,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,mae,1.359641671180725,false,168,813,348,handcrafted AAC audio block only (168 dims),,,,,,,,,8681.8916015625,1.359641671180725,-0.007849137404613904,5096,,,,,,,,,,,, |
| modality_reconstruction,Sensor-to-Visual Reconstruction,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,mae,2.6225292682647705,false,588,813,348,raw waveform log-mel embedding only (588 dims),,,,,,,,,8708.6181640625,2.6225292682647705,-0.010951711094258076,5096,,,,,,,,,,,, |
| modality_reconstruction,Sensor-to-Visual Reconstruction,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,mae,8.830678939819336,false,2835,813,348,task contract with handcrafted AAC removed and raw log-mel added (2835 dims),,,,,,,,,12454.744140625,8.830678939819336,-0.44582574939092745,5096,,,,,,,,,,,, |
| modality_reconstruction,Sensor-to-Visual Reconstruction,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,mae,8.392388343811035,false,3003,813,348,task contract with existing handcrafted AAC plus raw log-mel (3003 dims),,,,,,,,,12078.8935546875,8.392388343811035,-0.4021946589830052,5096,,,,,,,,,,,, |
| temporal_order,Temporal Order Verification,all_handcrafted_audio,All Current Features,computed,macro_f1,0.5172413793103449,true,25638,1624,696,task contract feature blocks with handcrafted AAC audio where applicable (8546 dims),,,0.5172413793103449,0.5172413793103449,0.5172413793103449,2,[],0,,,,,,,,,,,,,,,, |
| temporal_order,Temporal Order Verification,all_except_audio,All Except Audio,computed,macro_f1,0.4942528735632184,true,25134,1624,696,same task contract with handcrafted AAC audio columns removed (8378 dims),,,0.4942528735632184,0.4942528735632184,0.4942528735632184,2,[],0,,,,,,,,,,,,,,,, |
| temporal_order,Temporal Order Verification,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,0.4425287356321839,true,504,1624,696,handcrafted AAC audio block only (168 dims),,,0.4425287356321839,0.4425287356321839,0.4425287356321839,2,[],0,,,,,,,,,,,,,,,, |
| temporal_order,Temporal Order Verification,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,0.5028735632183908,true,1764,1624,696,raw waveform log-mel embedding only (588 dims),,,0.5028735632183908,0.5028735632183908,0.5028735632183908,2,[],0,,,,,,,,,,,,,,,, |
| temporal_order,Temporal Order Verification,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,0.5301714439065678,true,26898,1624,696,task contract with handcrafted AAC removed and raw log-mel added (8966 dims),,,0.5301724137931034,0.5301714439065678,0.5301724137931034,2,[],0,,,,,,,,,,,,,,,, |
| temporal_order,Temporal Order Verification,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,0.5330450130569861,true,27402,1624,696,task contract with existing handcrafted AAC plus raw log-mel (9134 dims),,,0.5330459770114943,0.5330450130569861,0.5330459770114943,2,[],0,,,,,,,,,,,,,,,, |
| misalignment_detection,Cross-Modal Misalignment Detection,all_handcrafted_audio,All Current Features,computed,macro_f1,0.41734045375379186,true,7511,1614,692,motion/current visual+handcrafted audio pair,,,0.4725433526011561,0.41734045375379186,0.4725433526011561,2,[],0,,,,,,,,,,,,,,,, |
| misalignment_detection,Cross-Modal Misalignment Detection,all_except_audio,All Except Audio,computed,macro_f1,0.42258557365378524,true,7343,1614,692,motion/current visual pair with audio removed,,,0.47832369942196534,0.42258557365378524,0.4783236994219653,2,[],0,,,,,,,,,,,,,,,, |
| misalignment_detection,Cross-Modal Misalignment Detection,handcrafted_audio_only,Handcrafted AAC Audio Only,computed,macro_f1,0.5102351916376306,true,504,1614,692,handcrafted AAC audio self-alignment pair,,,0.5115606936416185,0.5102351916376306,0.5115606936416185,2,[],0,,,,,,,,,,,,,,,, |
| misalignment_detection,Cross-Modal Misalignment Detection,raw_logmel_audio_only,Raw Log-Mel Audio Only,computed,macro_f1,0.47823544277887897,true,1764,1614,692,raw log-mel audio self-alignment pair,,,0.47832369942196534,0.47823544277887897,0.4783236994219653,2,[],0,,,,,,,,,,,,,,,, |
| misalignment_detection,Cross-Modal Misalignment Detection,replace_handcrafted_with_raw,Replace AAC Block With Raw Log-Mel,computed,macro_f1,0.44378951880827355,true,7931,1614,692,motion/current visual pair with raw log-mel replacing handcrafted audio,,,0.4797687861271676,0.44378951880827355,0.47976878612716767,2,[],0,,,,,,,,,,,,,,,, |
| misalignment_detection,Cross-Modal Misalignment Detection,all_plus_raw_logmel,All Current Features + Raw Log-Mel,computed,macro_f1,0.4373795761078998,true,8099,1614,692,motion/current visual+handcrafted audio pair plus raw log-mel,,,0.4725433526011561,0.4373795761078998,0.4725433526011561,2,[],0,,,,,,,,,,,,,,,, |
|
|