https://huggingface.co/datasets/BAAI/Infinity-Instruct the 3M and 7M datasets boost HellaSwag, MMLU GSM8k..","text":"filtered from https://huggingface.co/datasets/BAAI/Infinity-Instruct the 3M and 7M datasets boost HellaSwag, MMLU GSM8k.."},"author":"arcee-ai","downloads":43,"gated":false,"id":"arcee-ai/infini-instruct-top-500k","lastModified":"2024-06-30T09:21:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":500000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":5,"isLikedByUser":false},{"_id":"67000150237c9f683e634330","position":2,"type":"dataset","author":"arcee-ai","downloads":381,"gated":false,"id":"arcee-ai/The-Tome","lastModified":"2024-08-15T16:08:27.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1752473,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":80,"isLikedByUser":false},{"_id":"670001609f008295fd85486b","position":3,"type":"dataset","author":"teknium","downloads":5116,"gated":false,"id":"teknium/OpenHermes-2.5","lastModified":"2024-04-15T08:18:12.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1001551,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":688,"isLikedByUser":false}],"position":1,"theme":"blue","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/instruct-datasets-66ffff8ac707c1fba9216d67","upvotes":0,"isUpvotedByUser":false},{"slug":"loubnabnl/filtering-the-web-with-llms-665c1d9f957df09a07bc52e3","title":"π Filtering the web with LLMs","description":"","lastUpdated":"2024-06-02T07:23:43.571Z","owner":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"items":[{"_id":"665c1dc693b937f83edbdad8","position":0,"type":"dataset","author":"HuggingFaceFW","downloads":408437,"gated":false,"id":"HuggingFaceFW/fineweb-edu","lastModified":"2024-10-11T07:55:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":3004505493,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":552,"isLikedByUser":false},{"_id":"665c1dd0987f055eff458792","position":1,"type":"model","author":"HuggingFaceFW","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62596f9e1c0a084224b93e00/EfmW5LH_nj0FCEZH7wH2p.png","fullname":"HuggingFaceFW","name":"HuggingFaceFW","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":154},"downloads":17070,"gated":false,"id":"HuggingFaceFW/fineweb-edu-classifier","inference":"not-popular-enough","lastModified":"2024-11-17T15:00:11.000Z","likes":142,"pipeline_tag":"text-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"665c1dd80d71762b86223e3f","position":2,"type":"model","author":"HuggingFaceFW","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62596f9e1c0a084224b93e00/EfmW5LH_nj0FCEZH7wH2p.png","fullname":"HuggingFaceFW","name":"HuggingFaceFW","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":154},"downloads":656,"gated":false,"id":"HuggingFaceFW/ablation-model-fineweb-edu","inference":"not-popular-enough","lastModified":"2024-06-11T12:00:27.000Z","likes":11,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"665c1de124cc1676f9c682e8","position":3,"type":"dataset","author":"math-ai","downloads":24294,"gated":false,"id":"math-ai/AutoMathText","lastModified":"2024-10-30T21:19:01.000Z","datasetsServerInfo":{"viewer":"viewer-partial","numRows":7400558,"libraries":[],"formats":[],"modalities":["text"]},"private":false,"repoType":"dataset","likes":156,"isLikedByUser":false}],"position":3,"theme":"pink","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/filtering-the-web-with-llms-665c1d9f957df09a07bc52e3","upvotes":1,"isUpvotedByUser":false},{"slug":"loubnabnl/synthetic-textbooks-65df5d704ae91dc5bccfc051","title":"π Synthetic textbooks","description":"Synthetically generated textbooks","lastUpdated":"2024-06-02T07:23:43.576Z","owner":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"items":[{"_id":"65df5d9f35a76aec7148848b","position":0,"type":"dataset","author":"HuggingFaceTB","downloads":4154,"gated":false,"id":"HuggingFaceTB/cosmopedia","lastModified":"2024-08-12T22:05:49.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":31064744,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":567,"isLikedByUser":false},{"_id":"65df5da6701fe639ba18c82f","position":1,"type":"dataset","author":"Locutusque","downloads":1494,"gated":false,"id":"Locutusque/UltraTextbooks","lastModified":"2024-02-02T15:24:22.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5523999,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":194,"isLikedByUser":false},{"_id":"65df5e0f2705d9672f3cfdd7","position":2,"type":"model","author":"microsoft","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1583646260758-5e64858c87403103f9f1055d.png","fullname":"Microsoft","name":"microsoft","type":"org","isHf":false,"isMod":false,"isEnterprise":false,"followerCount":5652},"downloads":199885,"gated":false,"id":"microsoft/phi-2","inference":"cold","lastModified":"2024-04-29T16:25:56.000Z","likes":3249,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"65df5e13a3103cc636728f95","position":3,"type":"model","author":"HuggingFaceTB","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png","fullname":"Hugging Face TB Research","name":"HuggingFaceTB","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":610},"downloads":856,"gated":false,"id":"HuggingFaceTB/cosmo-1b","inference":"not-popular-enough","lastModified":"2024-07-08T14:47:31.000Z","likes":128,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":4,"theme":"blue","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/synthetic-textbooks-65df5d704ae91dc5bccfc051","upvotes":2,"isUpvotedByUser":false},{"slug":"loubnabnl/code-generation-65df5c21dea2916244966bee","title":"β¨ Code Generation","description":"Code generation models and datassets!","lastUpdated":"2024-06-02T07:23:43.572Z","owner":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"items":[{"_id":"65df5c36661345f8484dfa27","position":0,"type":"model","author":"bigcode","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":986},"downloads":24823,"gated":false,"id":"bigcode/starcoder2-15b","inference":"warm","lastModified":"2024-06-05T19:52:45.000Z","likes":570,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"65df5c8ddea2916244968d14","position":1,"type":"dataset","author":"bigcode","downloads":8649,"gated":"auto","id":"bigcode/the-stack","lastModified":"2023-04-13T12:15:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":545547422,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":745,"isLikedByUser":false},{"_id":"65df5c3eea95a4896bfe00fe","position":2,"type":"model","author":"bigcode","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":986},"downloads":338685,"gated":false,"id":"bigcode/starcoder2-3b","inference":"warm","lastModified":"2024-03-04T13:33:12.000Z","likes":153,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"65df5c84a3103cc636722b8b","position":3,"type":"model","author":"bigcode","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":986},"downloads":13559,"gated":"auto","id":"bigcode/starcoder","inference":"warm","lastModified":"2024-10-08T20:53:18.000Z","likes":2814,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":5,"theme":"purple","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/code-generation-65df5c21dea2916244966bee","upvotes":5,"isUpvotedByUser":false}],"datasets":[{"author":"loubnabnl","downloads":77,"gated":false,"id":"loubnabnl/generations_dataset_sysprompt","lastModified":"2024-09-20T15:37:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":53,"gated":false,"id":"loubnabnl/gens-360M-temp7-v2","lastModified":"2024-08-18T01:22:04.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":59,"gated":false,"id":"loubnabnl/gens-360M-v2","lastModified":"2024-08-17T21:56:05.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":61,"gated":false,"id":"loubnabnl/gens-135M-v2","lastModified":"2024-08-17T21:45:08.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":56,"gated":false,"id":"loubnabnl/generations_dataset","lastModified":"2024-08-17T11:28:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":40,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":25,"gated":false,"id":"loubnabnl/example-generations","lastModified":"2024-08-15T22:31:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":547,"libraries":["datasets","mlcroissant"],"formats":["text"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":42,"gated":false,"id":"loubnabnl/bisac_subset","lastModified":"2024-04-18T13:12:28.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":150,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":53,"gated":false,"id":"loubnabnl/wiki_test","lastModified":"2024-04-12T14:03:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":2,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":54,"gated":false,"id":"loubnabnl/test","lastModified":"2024-04-12T14:02:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":19055,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":42,"gated":false,"id":"loubnabnl/comsop_450_samples_detailed","lastModified":"2024-03-14T22:48:54.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":450,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/evaluation_prompts_hl","lastModified":"2024-03-06T19:14:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":42642,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/cosmopedia_3generations_temp_1","lastModified":"2024-03-06T18:47:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":450,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/cosmopedia_50_per_seed","lastModified":"2024-03-06T17:12:07.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":450,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/cosmopedia_50_per_seed_x3","lastModified":"2024-03-06T14:49:21.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1350,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/failed","lastModified":"2024-02-16T12:07:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/test_amt","lastModified":"2024-02-16T11:01:36.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":0,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":[]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":37,"gated":false,"id":"loubnabnl/stories_oh_children","lastModified":"2024-02-16T10:27:23.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/stories_oh_problem","lastModified":"2024-02-16T10:26:59.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/math_college","lastModified":"2024-02-15T20:18:46.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/math_gradeschool","lastModified":"2024-02-15T20:18:17.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/sample_jupyter_structured","lastModified":"2024-02-14T00:29:16.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/sample_jupyter","lastModified":"2024-02-14T00:25:38.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/sample_kaggle","lastModified":"2024-02-14T00:21:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/data_ultrachat","lastModified":"2024-01-16T15:15:03.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/code_data","lastModified":"2023-12-19T18:00:22.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1002,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/stackexchange_data","lastModified":"2023-11-23T12:42:58.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":96,"gated":false,"id":"loubnabnl/kaggle_scripts_new_format_subset","lastModified":"2023-11-09T11:55:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1160428,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/old_py","lastModified":"2023-10-10T12:16:21.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/old_python","lastModified":"2023-10-10T11:53:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":42509,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":194,"gated":false,"id":"loubnabnl/test_kaggle_2","lastModified":"2023-10-05T16:08:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":713947,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/test_kaggle_3","lastModified":"2023-10-05T15:12:14.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":70392,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/kaggle-data","lastModified":"2023-09-20T11:14:03.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/clean_prs2","lastModified":"2023-09-15T17:58:59.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":10000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":6,"gated":false,"id":"loubnabnl/notebook-renamed","lastModified":"2023-09-14T14:25:52.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":40,"gated":false,"id":"loubnabnl/prs-v2-sample","lastModified":"2023-09-14T12:55:12.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":10000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":11,"gated":false,"id":"loubnabnl/dummy_1","lastModified":"2023-08-31T10:14:51.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":42,"gated":false,"id":"loubnabnl/humaneval_plus","lastModified":"2023-08-30T20:10:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":164,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/wizardcoder-python-34b-generations","lastModified":"2023-08-29T13:46:38.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/gpt4-1k-annotations","lastModified":"2023-08-23T14:47:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/llama-10k-annotations","lastModified":"2023-08-23T11:46:43.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":9983,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":78,"gated":false,"id":"loubnabnl/textbooks-filtering-600-samples","lastModified":"2023-08-22T22:18:37.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":6000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/starcoderdata_py_smol","lastModified":"2023-08-22T20:16:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":129320,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":4,"gated":false,"id":"loubnabnl/kaggle-code-data","lastModified":"2023-08-18T08:49:46.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":18952,"gated":false,"id":"loubnabnl/repo-images","lastModified":"2023-08-17T16:43:55.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":20,"libraries":["datasets","mlcroissant"],"formats":["imagefolder"],"modalities":["image"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":4,"gated":false,"id":"loubnabnl/octocoder_generations","lastModified":"2023-08-16T23:01:06.000Z","datasetsServerInfo":{"viewer":"preview","numRows":0,"libraries":[],"formats":[],"modalities":[]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/llama2_ranks_the_stack","lastModified":"2023-08-10T14:15:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1500,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/community_submissions_ld","lastModified":"2023-08-02T08:53:02.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/multiple-codegeex-completions","lastModified":"2023-07-31T08:26:38.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":4,"gated":false,"id":"loubnabnl/multiple-py","lastModified":"2023-06-13T12:44:23.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/multiple-preds-new","lastModified":"2023-04-12T13:14:55.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":161,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/odex-data","lastModified":"2023-04-12T12:10:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":439,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":"manual","id":"loubnabnl/odex-test","lastModified":"2023-04-12T12:07:13.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/eval_harness_vs_multipl-e","lastModified":"2023-04-06T17:30:16.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":161,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/ada_key_merge_subset","lastModified":"2023-03-28T13:41:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":580,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/pii_checks_data_elm","lastModified":"2023-03-27T15:46:49.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/mathematica_checks","lastModified":"2023-03-27T13:51:11.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5440,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/dockerfile_checks","lastModified":"2023-03-27T13:50:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":137651,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/makefile_checks","lastModified":"2023-03-27T13:50:26.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":57421,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/common-lisp_checks","lastModified":"2023-03-27T13:49:47.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":31455,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/coffeescript_checks","lastModified":"2023-03-27T13:49:34.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":23874,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/emacs-lisp_checks","lastModified":"2023-03-27T13:49:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":19261,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/clojure_checks","lastModified":"2023-03-27T13:42:07.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":14174,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/rmarkdown_checks","lastModified":"2023-03-27T13:41:43.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":3493,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/ada-no-pii_checks","lastModified":"2023-03-27T11:48:03.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":10886,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/issues_content_500k","lastModified":"2023-03-09T09:14:35.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":500000,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":6,"gated":false,"id":"loubnabnl/scaling-laws-params","lastModified":"2023-03-08T18:21:04.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":116,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["csv"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/jupyter_python_max_line_length_1000","lastModified":"2023-03-02T17:46:11.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":174,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/github-issues","lastModified":"2023-03-01T23:13:01.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/large-text-issues","lastModified":"2023-03-01T19:20:20.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":163,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":38,"gated":false,"id":"loubnabnl/comments_preceding_bots","lastModified":"2023-02-24T19:50:14.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":183,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/pre-processed-issues","lastModified":"2023-02-24T16:53:22.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":6759,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/preprocessed-issues","lastModified":"2023-02-24T14:54:29.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":7351,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/bot_issues","lastModified":"2023-02-24T13:32:00.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1155,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/bigcode-data-stats","lastModified":"2023-02-01T14:25:41.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":40,"gated":false,"id":"loubnabnl/data_toloka","lastModified":"2022-12-21T01:39:36.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":108,"libraries":["datasets","dask","mlcroissant"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/data-filtering-statistics","lastModified":"2022-12-14T13:32:36.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":13,"gated":false,"id":"loubnabnl/rho-loss-dataset","lastModified":"2022-12-08T11:03:02.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":392756,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":48,"gated":false,"id":"loubnabnl/stack-filtered-pii-1M-java","lastModified":"2022-12-06T01:28:17.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000000,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/python_comment_code_ratio_08","lastModified":"2022-11-23T16:02:40.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":131,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/dummy_data_clean","lastModified":"2022-11-09T17:05:43.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":400,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/code-generations-bigcode","lastModified":"2022-10-25T15:53:42.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":94,"gated":false,"id":"loubnabnl/humaneval_infilling","lastModified":"2022-10-21T10:37:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":8652,"libraries":["datasets","mlcroissant"],"formats":[],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/language_detection_in_code","lastModified":"2022-10-13T16:08:47.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":2000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":6,"gated":false,"id":"loubnabnl/language_id_bigcode","lastModified":"2022-10-11T15:00:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":8000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":7,"gated":false,"id":"loubnabnl/bigcode_csharp","lastModified":"2022-10-10T23:35:32.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":81533,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["json"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":8,"gated":false,"id":"loubnabnl/github_jupyter_parsed_2","lastModified":"2022-09-19T08:36:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":451662,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":56,"gated":false,"id":"loubnabnl/github-code-small-filtering","lastModified":"2022-07-11T23:18:05.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":61538687,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":87,"gated":false,"id":"loubnabnl/github-small-near-dedup","lastModified":"2022-07-08T10:28:11.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":48618295,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":106,"gated":false,"id":"loubnabnl/github-code-clean-small","lastModified":"2022-07-06T09:58:52.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":70590990,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":127,"gated":false,"id":"loubnabnl/github-code-more-filtering","lastModified":"2022-06-30T22:28:37.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":99562534,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":758,"gated":false,"id":"loubnabnl/github-clean-v1","lastModified":"2022-06-28T00:59:02.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":31279484,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":1575,"gated":false,"id":"loubnabnl/github-code-duplicate","lastModified":"2022-06-27T20:02:27.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":115086922,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":37,"gated":false,"id":"loubnabnl/github-clean","lastModified":"2022-06-25T11:36:42.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":622,"gated":false,"id":"loubnabnl/tokenized-github-code-python","lastModified":"2022-04-28T00:13:55.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":7226626,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":[]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":696,"gated":false,"id":"loubnabnl/tokenized-codeparrot-train","lastModified":"2022-04-22T16:19:47.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5300000,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":16,"gated":false,"id":"loubnabnl/tokenized-codeparrot-valid","lastModified":"2022-04-22T14:29:06.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":61373,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false}],"models":[{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/test","inference":"library-not-detected","lastModified":"2024-10-16T13:35:05.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":7,"gated":false,"id":"loubnabnl/smollm-135M-instruct-v2","inference":"library-not-detected","lastModified":"2024-08-26T08:06:28.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-1.7B-instruct-v2","inference":"library-not-detected","lastModified":"2024-08-23T10:47:17.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":9,"gated":false,"id":"loubnabnl/smollm-360M-instruct-add-basics-w-math","inference":"library-not-detected","lastModified":"2024-08-13T15:47:25.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-135M-instruct-add-basics-w-math","inference":"library-not-detected","lastModified":"2024-08-13T13:28:18.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-350M-instruct-add-basics-eq","inference":"library-not-detected","lastModified":"2024-08-13T00:22:55.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":8,"gated":false,"id":"loubnabnl/smollm-350M-instruct-add-basics-only","inference":"library-not-detected","lastModified":"2024-08-13T00:21:42.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":7,"gated":false,"id":"loubnabnl/SmolLM-360M-4bit","inference":"not-popular-enough","lastModified":"2024-08-12T21:52:46.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":7,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test2-noOH","inference":"library-not-detected","lastModified":"2024-08-12T17:24:45.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test2-ep2","inference":"library-not-detected","lastModified":"2024-08-12T16:39:34.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":11,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test2","inference":"library-not-detected","lastModified":"2024-08-12T16:00:33.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test","inference":"library-not-detected","lastModified":"2024-08-06T15:36:09.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":3,"gated":false,"id":"loubnabnl/outputs","inference":"pipeline-not-detected","lastModified":"2024-02-27T12:35:45.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/starcoder_tokenizer","inference":"library-not-detected","lastModified":"2023-12-22T16:09:31.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/CodeLlama-7b-hf","inference":"not-popular-enough","lastModified":"2023-08-28T17:07:20.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/codellama-7b","inference":"library-not-detected","lastModified":"2023-08-28T16:50:15.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/model-1b-debug","inference":"library-not-detected","lastModified":"2023-06-22T10:07:12.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/hub_logs","inference":"library-not-detected","lastModified":"2023-06-21T16:38:01.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/starcoder-1b","inference":"not-popular-enough","lastModified":"2023-06-21T10:38:33.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/tokenizer-slimpajama","inference":"library-not-detected","lastModified":"2023-06-19T10:49:12.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/tokenizer-pile","inference":"library-not-detected","lastModified":"2023-06-19T10:34:23.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/starcoder-5b-noconf","inference":"library-not-detected","lastModified":"2023-06-15T12:34:39.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/starcoder-tokenizer","inference":"library-not-detected","lastModified":"2023-05-23T08:50:02.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/megatron-tp2","inference":"library-not-detected","lastModified":"2023-03-21T21:52:56.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/sl-test-1","inference":"library-not-detected","lastModified":"2023-03-11T14:27:46.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":17,"gated":false,"id":"loubnabnl/santacoder-393B-tokens","inference":"custom-code","lastModified":"2023-03-08T14:00:32.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":22,"gated":false,"id":"loubnabnl/santacoder-code-to-text","inference":"custom-code","lastModified":"2023-02-02T10:16:02.000Z","likes":5,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/santacoder-finetuned-the-stack-bash-3","inference":"library-not-detected","lastModified":"2023-01-22T00:46:34.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":16,"gated":false,"id":"loubnabnl/rho-loss-baseline-model","inference":"not-popular-enough","lastModified":"2022-12-09T12:45:04.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":17,"gated":false,"id":"loubnabnl/apps-1.5B-model","inference":"not-popular-enough","lastModified":"2022-07-28T15:40:49.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/codeparrot-small-multi-small-near-dedup","inference":"not-popular-enough","lastModified":"2022-07-18T09:20:36.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/codeparrot-small-near-dedup","inference":"not-popular-enough","lastModified":"2022-06-18T20:59:13.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":18,"gated":false,"id":"loubnabnl/codeparrot-small-scale","inference":"not-popular-enough","lastModified":"2022-05-15T14:34:14.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"numberLikes":89,"papers":[{"id":"2406.17557","title":"The FineWeb Datasets: Decanting the Web for the Finest Text Data at\n Scale","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2406.17557.png","upvotes":86,"publishedAt":"2024-06-25T13:50:56.000Z","isUpvotedByUser":false},{"id":"2405.18392","title":"Scaling Laws and Compute-Optimal Training Beyond Fixed Training\n Durations","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2405.18392.png","upvotes":12,"publishedAt":"2024-05-28T17:33:54.000Z","isUpvotedByUser":false},{"id":"2402.19173","title":"StarCoder 2 and The Stack v2: The Next Generation","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.19173.png","upvotes":136,"publishedAt":"2024-02-29T13:53:35.000Z","isUpvotedByUser":false},{"id":"2305.06161","title":"StarCoder: may the source be with you!","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2305.06161.png","upvotes":30,"publishedAt":"2023-05-09T08:16:42.000Z","isUpvotedByUser":false},{"id":"2303.03915","title":"The BigScience ROOTS Corpus: A 1.6TB Composite Multilingual Dataset","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2303.03915.png","upvotes":6,"publishedAt":"2023-03-07T14:25:44.000Z","isUpvotedByUser":false},{"id":"2301.03988","title":"SantaCoder: don't reach for the stars!","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2301.03988.png","upvotes":7,"publishedAt":"2023-01-09T10:52:35.000Z","isUpvotedByUser":false},{"id":"2211.15533","title":"The Stack: 3 TB of permissively licensed source code","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2211.15533.png","upvotes":5,"publishedAt":"2022-11-20T18:15:30.000Z","isUpvotedByUser":false},{"id":"2211.05100","title":"BLOOM: A 176B-Parameter Open-Access Multilingual Language Model","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2211.05100.png","upvotes":28,"publishedAt":"2022-11-09T18:48:09.000Z","isUpvotedByUser":false}],"posts":[{"slug":"547206198374677","content":[{"type":"text","value":"Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit π οΈ ","raw":"Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit π οΈ "},{"type":"link","href":"https://github.com/huggingface/smollm/","raw":"https://github.com/huggingface/smollm/"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Pre-training code with nanotron","raw":"- Pre-training code with nanotron"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Evaluation suite with lighteval","raw":"- Evaluation suite with lighteval"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Synthetic data generation using distilabel (powers our new SFT dataset ","raw":"- Synthetic data generation using distilabel (powers our new SFT dataset "},{"type":"resource","resource":{"type":"dataset","id":"HuggingFaceTB/smoltalk"},"url":"https://huggingface.co/datasets/HuggingFaceTB/smoltalk","raw":"https://huggingface.co/datasets/HuggingFaceTB/smoltalk"},{"type":"text","value":")","raw":")"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Post-training scripts with TRL & the alignment handbook","raw":"- Post-training scripts with TRL & the alignment handbook"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- On-device tools with llama.cpp for summarization, rewriting & agents","raw":"- On-device tools with llama.cpp for summarization, rewriting & agents"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Apache 2.0 licensed. V2 pre-training data mix coming soon!","raw":"Apache 2.0 licensed. V2 pre-training data mix coming soon!"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Which other tools should we add next?","raw":"Which other tools should we add next?"}],"rawContent":"Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit π οΈ https://github.com/huggingface/smollm/\n\n- Pre-training code with nanotron\n- Evaluation suite with lighteval\n- Synthetic data generation using distilabel (powers our new SFT dataset https://huggingface.co/datasets/HuggingFaceTB/smoltalk)\n- Post-training scripts with TRL & the alignment handbook\n- On-device tools with llama.cpp for summarization, rewriting & agents\n\nApache 2.0 licensed. V2 pre-training data mix coming soon!\n\nWhich other tools should we add next?","author":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374,"isFollowing":false},"attachments":[],"mentions":[],"reactions":[{"reaction":"π₯","users":["reach-vb","John6666","not-lain","AtAndDev","Joseph717171","vansin"],"count":6},{"reaction":"π€","users":["Joseph717171"],"count":1}],"publishedAt":"2024-11-24T16:00:22.000Z","updatedAt":"2024-11-24T16:01:53.795Z","commentators":[],"url":"/posts/loubnabnl/547206198374677","totalUniqueImpressions":1360,"numComments":0},{"slug":"634384490754714","content":[{"type":"text","value":"π· FineWeb technical report is out and so is π FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA.","raw":"π· FineWeb technical report is out and so is π FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Technical report: ","raw":"Technical report: "},{"type":"resource","resource":{"type":"space","id":"HuggingFaceFW/blogpost-fineweb-v1"},"url":"https://hf.co/spaces/HuggingFaceFW/blogpost-fineweb-v1","raw":"https://hf.co/spaces/HuggingFaceFW/blogpost-fineweb-v1"},{"type":"text","value":" ","raw":" "},{"type":"new_line","raw":"\n"},{"type":"text","value":"Dataset: ","raw":"Dataset: "},{"type":"resource","resource":{"type":"dataset","id":"HuggingFaceFW/fineweb-edu"},"url":"https://hf.co/datasets/HuggingFaceFW/fineweb-edu","raw":"https://hf.co/datasets/HuggingFaceFW/fineweb-edu"},{"type":"text","value":" ","raw":" "},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"We used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens. ","raw":"We used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens. "},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"You can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets.","raw":"You can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Enjoy!","raw":"Enjoy!"}],"rawContent":"π· FineWeb technical report is out and so is π FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA.\n\nTechnical report: https://hf.co/spaces/HuggingFaceFW/blogpost-fineweb-v1 \nDataset: https://hf.co/datasets/HuggingFaceFW/fineweb-edu \n\nWe used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens. \n\nYou can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets.\n\nEnjoy!","author":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374,"isFollowing":false},"attachments":[{"type":"image","url":"https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/zlFOO3Gh5zPpJ-vjvSMEs.png"}],"mentions":[],"reactions":[{"reaction":"π₯","users":["mmhamdy","alielfilali01","GPT007","arjunguha","hiauiarau","nicolay-r","privategeek24","Ariel323","maywell","guipenedo","neuralink","asaduzzaman319"],"count":12},{"reaction":"π","users":["dillfrescott","guipenedo","neuralink"],"count":3},{"reaction":"π§ ","users":["neuralink","louisbrulenaudet"],"count":2}],"publishedAt":"2024-06-02T08:15:54.000Z","updatedAt":"2024-06-02T08:16:14.466Z","commentators":[],"url":"/posts/loubnabnl/634384490754714","totalUniqueImpressions":5105,"numComments":0}],"totalPosts":4,"spaces":[{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"green","colorTo":"purple","createdAt":"2024-08-15T21:51:03.000Z","emoji":"π€","id":"loubnabnl/test-smollm","lastModified":"2024-08-16T14:29:44.000Z","likes":1,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-test-smollm.hf.space","isCustom":false,"stage":"READY"}],"sha":"546fa514c1327e57c03d4f41cfccd8a0be637ccd"},"title":"Smol Playground","isLikedByUser":false,"originSpace":{"name":"HuggingFaceTB/instant-smollm","author":{"_id":"648a374f00f7a3374ee64b99","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/648a374f00f7a3374ee64b99/YPwSOrronoozwHbJchPn3.jpeg","fullname":"Caleb Fahlgren","name":"cfahlgren1","type":"user","isPro":true,"isHf":true,"isMod":false,"followerCount":134}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"indigo","colorTo":"green","createdAt":"2024-05-03T10:28:35.000Z","emoji":"π","id":"loubnabnl/inspect_sanchit_annotations","lastModified":"2024-05-03T10:29:05.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-inspect-sanchit-annotations.hf.space","isCustom":false,"stage":"READY"}]},"title":"Inspect Sanchit Anotations","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"blue","colorTo":"pink","createdAt":"2024-03-28T21:01:21.000Z","emoji":"π’","id":"loubnabnl/zero-gpu","lastModified":"2024-04-22T09:47:47.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"zero-a10g"},"storage":null,"gcTimeout":172800,"errorMessage":"Launch timed out, workload was not healthy after 30 min","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-zero-gpu.hf.space","isCustom":false,"stage":"READY"}]},"title":"Zero Gpu","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"purple","colorTo":"blue","createdAt":"2024-03-07T15:42:22.000Z","emoji":"πΈοΈ","id":"loubnabnl/inspect_selfcheck","lastModified":"2024-03-13T15:56:41.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"errorMessage":"","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-inspect-selfcheck.hf.space","isCustom":false,"stage":"READY"}]},"title":"selfcheck","isLikedByUser":false,"originSpace":{"name":"HuggingFaceTB/inspect_cosmopedia","author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png","fullname":"Hugging Face TB Research","name":"HuggingFaceTB","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":610}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"gray","colorTo":"gray","createdAt":"2023-09-13T07:53:48.000Z","emoji":"β€οΈ","id":"loubnabnl/Nt3awnou-rescue-map2","lastModified":"2023-09-13T23:38:21.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"errorMessage":"Launch timed out, space was not healthy after 30 min","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-nt3awnou-rescue-map2.hf.space","isCustom":false,"stage":"READY"}]},"title":"Nt3awnu Map","isLikedByUser":false,"originSpace":{"name":"nt3awnou/Nt3awnou-rescue-map","author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5ff8c9f4b2035d9a81a859f7/PCXLFCzqijRtBxma4JdsJ.png","fullname":"Nt3awnou","name":"nt3awnou","type":"org","isHf":false,"isMod":false,"isEnterprise":false,"followerCount":27}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"purple","colorTo":"purple","createdAt":"2023-03-30T13:26:06.000Z","emoji":"π₯","id":"loubnabnl/diff-visualizer","lastModified":"2023-08-27T11:35:52.000Z","likes":1,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-diff-visualizer.hf.space","isCustom":false,"stage":"READY"}]},"title":"Diff Visualizer","isLikedByUser":false,"originSpace":{"name":"SaulLu/diff-visualizer","author":{"_id":"60741a2e69a66931a0273f0c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1618938489629-60741a2e69a66931a0273f0c.png","fullname":"Lucile Saulnier","name":"SaulLu","type":"user","isPro":false,"isHf":false,"isMod":false,"followerCount":119}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"red","colorTo":"blue","createdAt":"2022-12-29T10:43:47.000Z","emoji":"π¬","id":"loubnabnl/the-stack-bot","lastModified":"2023-02-27T16:46:00.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"errorMessage":"Traceback (most recent call last):\n File \"/home/user/.local/bin/streamlit\", line 5, in \n from streamlit.web.cli import main\n File \"/home/user/.local/lib/python3.8/site-packages/streamlit/__init__.py\", line 55, in \n from streamlit.delta_generator import DeltaGenerator as _DeltaGenerator\n File \"/home/user/.local/lib/python3.8/site-packages/streamlit/delta_generator.py\", line 45, in \n from streamlit.elements.arrow_altair import ArrowAltairMixin\n File \"/home/user/.local/lib/python3.8/site-packages/streamlit/elements/arrow_altair.py\", line 35, in \n from altair.vegalite.v4.api import Chart\nModuleNotFoundError: No module named 'altair.vegalite.v4'\n","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-the-stack-bot.hf.space","isCustom":false,"stage":"READY"}]},"title":"The Stack Bot","isLikedByUser":false}],"u":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","isPro":false,"fullname":"Loubna Ben Allal","user":"loubnabnl","orgs":[{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1583856921041-5dd96eb166059660ed1ee413.png","fullname":"Hugging Face","name":"huggingface","userRole":"write","type":"org","isHf":true},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1634806038075-5df7e9e5da6d0311fd3d53f9.png","fullname":"BigScience Workshop","name":"bigscience","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1637946025573-5fbfd09ee366524fe8e97cd3.webp","fullname":"BigScience Catalogue Data","name":"bigscience-catalogue-data","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1642714419598-5fbfd09ee366524fe8e97cd3.webp","fullname":"BigScience Data","name":"bigscience-data","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61140e35d08630d2676c9829/HdUBhsZwa2fJq6pP0pM0H.png","fullname":"HuggingFaceBR4","name":"HuggingFaceBR4","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/6e5f8ee29ce0e7becbbe45fd7ee8bffc?d=retro&size=100","fullname":"Team 8","name":"Team8","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1655756383598-61c141342aac764ce1654e43.png","fullname":"CodeParrot","name":"codeparrot","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/j26aNEdiOgptZxJ6akGCC.png","fullname":"Hugging Face H4","name":"HuggingFaceH4","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1663704750230-5dd96eb166059660ed1ee413.png","fullname":"CompVis Community","name":"compvis-community","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1663191953132-61c141342aac764ce1654e43.png","fullname":"BigCode Data","name":"bigcode-data","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/b7f53b5386493a352a5dd4b01d2e0ade?d=retro&size=100","fullname":"LocalCodeLLMs","name":"local-code-llms","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/KXY7ApNHoUzmZsDM3Vchx.png","fullname":"Need4Speed","name":"need-for-speed","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5ff5d596f244529b3ec0fb89/d3KMtMG5-XAS2uhbp82Qz.png","fullname":"Code Llama","name":"codellama","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png","fullname":"Hugging Face TB Research","name":"HuggingFaceTB","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/bd6f32283561374ab3e454f334600863?d=retro&size=100","fullname":"Hugging Face Smol Cluster","name":"HFSmolCluster","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5ff8c9f4b2035d9a81a859f7/PCXLFCzqijRtBxma4JdsJ.png","fullname":"Nt3awnou","name":"nt3awnou","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6032802e1f993496bc14d9e3/3s4vnB6_xPaPPWcZkF6jT.jpeg","fullname":"huggingPartyParis","name":"HuggingPartyParis","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png","fullname":"Qwen","name":"Qwen","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/exBCcWH8McHg1hVQAtvN5.png","fullname":"ZeroGPU Explorers","name":"zero-gpu-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/703e31cb0f692d47af3f21b7b73ca944?d=retro&size=100","fullname":"HF AFAIK","name":"afaik-hf","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/1f34d5d65a1686192504327df19cb60b?d=retro&size=100","fullname":"gg-hf","name":"gg-hf","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/4d40163d9b45c74754e0919aab851407?d=retro&size=100","fullname":"Nanotron Research","name":"nanotron","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61d354424d8b92469b15345c/TSO3ZwxLYEcg7QNJsr3I5.png","fullname":"Women on Hugging Face","name":"WomenonHuggingFace","userRole":"contributor","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/R0pIOsiMiw3YnfGvtqwna.png","fullname":"Hugging Face SMOL","name":"HuggingFaceSmol","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62596f9e1c0a084224b93e00/EfmW5LH_nj0FCEZH7wH2p.png","fullname":"HuggingFaceFW","name":"HuggingFaceFW","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/1aa77d2e6163c9a360c2dcb928488af8?d=retro&size=100","fullname":"bigcode nvidia","name":"bigcode-nvidia","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/nxmdd6m86cxu55UZBlQeg.jpeg","fullname":"Social Post Explorers","name":"social-post-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/V8fnWFEWwXTgCQuIHnPmk.png","fullname":"Dev Mode Explorers","name":"dev-mode-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/adbb6fae1cbe225aaaa9db237369dbc3?d=retro&size=100","fullname":"Cosmopedia Stories Collab","name":"cosmopedia-stories-collab","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/602e6dee60e3dd96631c906e/DjTUNJIAMsFzrjzm84Dr-.png","fullname":"StarCoder2 Data","name":"starcoder2data","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/b8e6a19d52546ec99766d15d3402212c?d=retro&size=100","fullname":"Data Agents","name":"data-agents","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/60f2fc91b92afccb7c34b8ed/ndW0EQekNd4krxBkQ-S4W.png","fullname":"Argilla Warehouse","name":"argilla-warehouse","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/fa4ba2b868f91d84f74f7db74133c5c3?d=retro&size=100","fullname":"smol-explorers","name":"smol-explorers","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/6da47922b9be34c97ecbcba7bb557264?d=retro&size=100","fullname":"swissai-hf-data","name":"swissai-hf-data","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/oges-i7Dd9Fs0FDrh3bEs.png","fullname":"Hugging Face Science","name":"science","userRole":"write","type":"org","isHf":false}],"signup":{"github":"loubnabnl","twitter":"LoubnaBenAllal1","details":"LLMs, ML for code, Synthetic data","homepage":"https://loubnabnl.github.io/","bluesky":"loubnanl.bsky.social","linkedin":""},"isHf":true,"isMod":false,"type":"user"},"upvotes":12,"repoFilterModels":{"sortKey":"modified"},"repoFilterDatasets":{"sortKey":"modified"},"repoFilterSpaces":{"sortKey":"modified"},"numFollowers":2374,"numFollowing":57,"isFollowing":false,"isFollower":false,"sampleFollowers":[{"user":"MackinationsAi","fullname":"AiM","type":"user","_id":"662207ab89fc1bc582895ab0","isPro":true,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/9FdtdYBfHv__g1YcRGXEr.png"},{"user":"Annduril","fullname":"Luis Alfonso Gutierrez","type":"user","_id":"6366b5e96604a4fee8571fec","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6366b5e96604a4fee8571fec/-3HqRLsfOARNf0-XP61jD.jpeg"},{"user":"luckywu720","fullname":"lucky WU","type":"user","_id":"66014011fe3a7a542cd6ea84","isPro":false,"avatarUrl":"/avatars/06c51a005b87c7e3ec36dc9ea2d7cd51.svg"},{"user":"Profrandom","fullname":"Alexander Koelnberger","type":"user","_id":"671f6ab61925c496300ed8ab","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/7gXO_bHTxyJO6GefQ5Id5.png"}],"isWatching":false,"hardwareItems":[],"acceptLanguages":["en","*"]}">
- Pre-training code with nanotron - Evaluation suite with lighteval - Synthetic data generation using distilabel (powers our new SFT dataset HuggingFaceTB/smoltalk) - Post-training scripts with TRL & the alignment handbook - On-device tools with llama.cpp for summarization, rewriting & agents
Apache 2.0 licensed. V2 pre-training data mix coming soon!
π· FineWeb technical report is out and so is π FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA.
We used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens.
You can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets.