https://huggingface.co/datasets/BAAI/Infinity-Instruct the 3M and 7M datasets boost HellaSwag, MMLU GSM8k..","text":"filtered from https://huggingface.co/datasets/BAAI/Infinity-Instruct the 3M and 7M datasets boost HellaSwag, MMLU GSM8k.."},"author":"arcee-ai","downloads":43,"gated":false,"id":"arcee-ai/infini-instruct-top-500k","lastModified":"2024-06-30T09:21:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":500000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":5,"isLikedByUser":false},{"_id":"67000150237c9f683e634330","position":2,"type":"dataset","author":"arcee-ai","downloads":381,"gated":false,"id":"arcee-ai/The-Tome","lastModified":"2024-08-15T16:08:27.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1752473,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":80,"isLikedByUser":false},{"_id":"670001609f008295fd85486b","position":3,"type":"dataset","author":"teknium","downloads":5116,"gated":false,"id":"teknium/OpenHermes-2.5","lastModified":"2024-04-15T08:18:12.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1001551,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":688,"isLikedByUser":false}],"position":1,"theme":"blue","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/instruct-datasets-66ffff8ac707c1fba9216d67","upvotes":0,"isUpvotedByUser":false},{"slug":"loubnabnl/filtering-the-web-with-llms-665c1d9f957df09a07bc52e3","title":"πŸ“š Filtering the web with LLMs","description":"","lastUpdated":"2024-06-02T07:23:43.571Z","owner":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"items":[{"_id":"665c1dc693b937f83edbdad8","position":0,"type":"dataset","author":"HuggingFaceFW","downloads":408437,"gated":false,"id":"HuggingFaceFW/fineweb-edu","lastModified":"2024-10-11T07:55:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":3004505493,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":552,"isLikedByUser":false},{"_id":"665c1dd0987f055eff458792","position":1,"type":"model","author":"HuggingFaceFW","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62596f9e1c0a084224b93e00/EfmW5LH_nj0FCEZH7wH2p.png","fullname":"HuggingFaceFW","name":"HuggingFaceFW","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":154},"downloads":17070,"gated":false,"id":"HuggingFaceFW/fineweb-edu-classifier","inference":"not-popular-enough","lastModified":"2024-11-17T15:00:11.000Z","likes":142,"pipeline_tag":"text-classification","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"665c1dd80d71762b86223e3f","position":2,"type":"model","author":"HuggingFaceFW","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62596f9e1c0a084224b93e00/EfmW5LH_nj0FCEZH7wH2p.png","fullname":"HuggingFaceFW","name":"HuggingFaceFW","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":154},"downloads":656,"gated":false,"id":"HuggingFaceFW/ablation-model-fineweb-edu","inference":"not-popular-enough","lastModified":"2024-06-11T12:00:27.000Z","likes":11,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"665c1de124cc1676f9c682e8","position":3,"type":"dataset","author":"math-ai","downloads":24294,"gated":false,"id":"math-ai/AutoMathText","lastModified":"2024-10-30T21:19:01.000Z","datasetsServerInfo":{"viewer":"viewer-partial","numRows":7400558,"libraries":[],"formats":[],"modalities":["text"]},"private":false,"repoType":"dataset","likes":156,"isLikedByUser":false}],"position":3,"theme":"pink","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/filtering-the-web-with-llms-665c1d9f957df09a07bc52e3","upvotes":1,"isUpvotedByUser":false},{"slug":"loubnabnl/synthetic-textbooks-65df5d704ae91dc5bccfc051","title":"🌌 Synthetic textbooks","description":"Synthetically generated textbooks","lastUpdated":"2024-06-02T07:23:43.576Z","owner":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"items":[{"_id":"65df5d9f35a76aec7148848b","position":0,"type":"dataset","author":"HuggingFaceTB","downloads":4154,"gated":false,"id":"HuggingFaceTB/cosmopedia","lastModified":"2024-08-12T22:05:49.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":31064744,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":567,"isLikedByUser":false},{"_id":"65df5da6701fe639ba18c82f","position":1,"type":"dataset","author":"Locutusque","downloads":1494,"gated":false,"id":"Locutusque/UltraTextbooks","lastModified":"2024-02-02T15:24:22.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5523999,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":194,"isLikedByUser":false},{"_id":"65df5e0f2705d9672f3cfdd7","position":2,"type":"model","author":"microsoft","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1583646260758-5e64858c87403103f9f1055d.png","fullname":"Microsoft","name":"microsoft","type":"org","isHf":false,"isMod":false,"isEnterprise":false,"followerCount":5652},"downloads":199885,"gated":false,"id":"microsoft/phi-2","inference":"cold","lastModified":"2024-04-29T16:25:56.000Z","likes":3249,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"65df5e13a3103cc636728f95","position":3,"type":"model","author":"HuggingFaceTB","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png","fullname":"Hugging Face TB Research","name":"HuggingFaceTB","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":610},"downloads":856,"gated":false,"id":"HuggingFaceTB/cosmo-1b","inference":"not-popular-enough","lastModified":"2024-07-08T14:47:31.000Z","likes":128,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":4,"theme":"blue","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/synthetic-textbooks-65df5d704ae91dc5bccfc051","upvotes":2,"isUpvotedByUser":false},{"slug":"loubnabnl/code-generation-65df5c21dea2916244966bee","title":"✨ Code Generation","description":"Code generation models and datassets!","lastUpdated":"2024-06-02T07:23:43.572Z","owner":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"items":[{"_id":"65df5c36661345f8484dfa27","position":0,"type":"model","author":"bigcode","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":986},"downloads":24823,"gated":false,"id":"bigcode/starcoder2-15b","inference":"warm","lastModified":"2024-06-05T19:52:45.000Z","likes":570,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"65df5c8ddea2916244968d14","position":1,"type":"dataset","author":"bigcode","downloads":8649,"gated":"auto","id":"bigcode/the-stack","lastModified":"2023-04-13T12:15:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":545547422,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":745,"isLikedByUser":false},{"_id":"65df5c3eea95a4896bfe00fe","position":2,"type":"model","author":"bigcode","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":986},"downloads":338685,"gated":false,"id":"bigcode/starcoder2-3b","inference":"warm","lastModified":"2024-03-04T13:33:12.000Z","likes":153,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"_id":"65df5c84a3103cc636722b8b","position":3,"type":"model","author":"bigcode","authorData":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":986},"downloads":13559,"gated":"auto","id":"bigcode/starcoder","inference":"warm","lastModified":"2024-10-08T20:53:18.000Z","likes":2814,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"position":5,"theme":"purple","private":false,"shareUrl":"https://huggingface.co/collections/loubnabnl/code-generation-65df5c21dea2916244966bee","upvotes":5,"isUpvotedByUser":false}],"datasets":[{"author":"loubnabnl","downloads":77,"gated":false,"id":"loubnabnl/generations_dataset_sysprompt","lastModified":"2024-09-20T15:37:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":53,"gated":false,"id":"loubnabnl/gens-360M-temp7-v2","lastModified":"2024-08-18T01:22:04.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":59,"gated":false,"id":"loubnabnl/gens-360M-v2","lastModified":"2024-08-17T21:56:05.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":61,"gated":false,"id":"loubnabnl/gens-135M-v2","lastModified":"2024-08-17T21:45:08.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":41,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":56,"gated":false,"id":"loubnabnl/generations_dataset","lastModified":"2024-08-17T11:28:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":40,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":25,"gated":false,"id":"loubnabnl/example-generations","lastModified":"2024-08-15T22:31:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":547,"libraries":["datasets","mlcroissant"],"formats":["text"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":42,"gated":false,"id":"loubnabnl/bisac_subset","lastModified":"2024-04-18T13:12:28.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":150,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":53,"gated":false,"id":"loubnabnl/wiki_test","lastModified":"2024-04-12T14:03:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":2,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":54,"gated":false,"id":"loubnabnl/test","lastModified":"2024-04-12T14:02:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":19055,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":42,"gated":false,"id":"loubnabnl/comsop_450_samples_detailed","lastModified":"2024-03-14T22:48:54.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":450,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/evaluation_prompts_hl","lastModified":"2024-03-06T19:14:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":42642,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/cosmopedia_3generations_temp_1","lastModified":"2024-03-06T18:47:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":450,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/cosmopedia_50_per_seed","lastModified":"2024-03-06T17:12:07.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":450,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/cosmopedia_50_per_seed_x3","lastModified":"2024-03-06T14:49:21.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1350,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/failed","lastModified":"2024-02-16T12:07:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/test_amt","lastModified":"2024-02-16T11:01:36.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":0,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":[]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":37,"gated":false,"id":"loubnabnl/stories_oh_children","lastModified":"2024-02-16T10:27:23.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/stories_oh_problem","lastModified":"2024-02-16T10:26:59.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/math_college","lastModified":"2024-02-15T20:18:46.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/math_gradeschool","lastModified":"2024-02-15T20:18:17.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/sample_jupyter_structured","lastModified":"2024-02-14T00:29:16.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/sample_jupyter","lastModified":"2024-02-14T00:25:38.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/sample_kaggle","lastModified":"2024-02-14T00:21:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/data_ultrachat","lastModified":"2024-01-16T15:15:03.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/code_data","lastModified":"2023-12-19T18:00:22.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1002,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/stackexchange_data","lastModified":"2023-11-23T12:42:58.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":96,"gated":false,"id":"loubnabnl/kaggle_scripts_new_format_subset","lastModified":"2023-11-09T11:55:10.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1160428,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/old_py","lastModified":"2023-10-10T12:16:21.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/old_python","lastModified":"2023-10-10T11:53:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":42509,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":194,"gated":false,"id":"loubnabnl/test_kaggle_2","lastModified":"2023-10-05T16:08:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":713947,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/test_kaggle_3","lastModified":"2023-10-05T15:12:14.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":70392,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/kaggle-data","lastModified":"2023-09-20T11:14:03.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/clean_prs2","lastModified":"2023-09-15T17:58:59.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":10000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":6,"gated":false,"id":"loubnabnl/notebook-renamed","lastModified":"2023-09-14T14:25:52.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":40,"gated":false,"id":"loubnabnl/prs-v2-sample","lastModified":"2023-09-14T12:55:12.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":10000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":11,"gated":false,"id":"loubnabnl/dummy_1","lastModified":"2023-08-31T10:14:51.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":42,"gated":false,"id":"loubnabnl/humaneval_plus","lastModified":"2023-08-30T20:10:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":164,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/wizardcoder-python-34b-generations","lastModified":"2023-08-29T13:46:38.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/gpt4-1k-annotations","lastModified":"2023-08-23T14:47:39.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/llama-10k-annotations","lastModified":"2023-08-23T11:46:43.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":9983,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":78,"gated":false,"id":"loubnabnl/textbooks-filtering-600-samples","lastModified":"2023-08-22T22:18:37.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":6000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":41,"gated":false,"id":"loubnabnl/starcoderdata_py_smol","lastModified":"2023-08-22T20:16:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":129320,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":4,"gated":false,"id":"loubnabnl/kaggle-code-data","lastModified":"2023-08-18T08:49:46.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":18952,"gated":false,"id":"loubnabnl/repo-images","lastModified":"2023-08-17T16:43:55.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":20,"libraries":["datasets","mlcroissant"],"formats":["imagefolder"],"modalities":["image"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":4,"gated":false,"id":"loubnabnl/octocoder_generations","lastModified":"2023-08-16T23:01:06.000Z","datasetsServerInfo":{"viewer":"preview","numRows":0,"libraries":[],"formats":[],"modalities":[]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/llama2_ranks_the_stack","lastModified":"2023-08-10T14:15:48.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1500,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/community_submissions_ld","lastModified":"2023-08-02T08:53:02.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/multiple-codegeex-completions","lastModified":"2023-07-31T08:26:38.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":4,"gated":false,"id":"loubnabnl/multiple-py","lastModified":"2023-06-13T12:44:23.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/multiple-preds-new","lastModified":"2023-04-12T13:14:55.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":161,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/odex-data","lastModified":"2023-04-12T12:10:50.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":439,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":"manual","id":"loubnabnl/odex-test","lastModified":"2023-04-12T12:07:13.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/eval_harness_vs_multipl-e","lastModified":"2023-04-06T17:30:16.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":161,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/ada_key_merge_subset","lastModified":"2023-03-28T13:41:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":580,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":36,"gated":false,"id":"loubnabnl/pii_checks_data_elm","lastModified":"2023-03-27T15:46:49.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/mathematica_checks","lastModified":"2023-03-27T13:51:11.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5440,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/dockerfile_checks","lastModified":"2023-03-27T13:50:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":137651,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/makefile_checks","lastModified":"2023-03-27T13:50:26.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":57421,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/common-lisp_checks","lastModified":"2023-03-27T13:49:47.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":31455,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/coffeescript_checks","lastModified":"2023-03-27T13:49:34.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":23874,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/emacs-lisp_checks","lastModified":"2023-03-27T13:49:33.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":19261,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/clojure_checks","lastModified":"2023-03-27T13:42:07.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":14174,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/rmarkdown_checks","lastModified":"2023-03-27T13:41:43.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":3493,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/ada-no-pii_checks","lastModified":"2023-03-27T11:48:03.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":10886,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/issues_content_500k","lastModified":"2023-03-09T09:14:35.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":500000,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":6,"gated":false,"id":"loubnabnl/scaling-laws-params","lastModified":"2023-03-08T18:21:04.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":116,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["csv"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/jupyter_python_max_line_length_1000","lastModified":"2023-03-02T17:46:11.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":174,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":3,"gated":false,"id":"loubnabnl/github-issues","lastModified":"2023-03-01T23:13:01.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":33,"gated":false,"id":"loubnabnl/large-text-issues","lastModified":"2023-03-01T19:20:20.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":163,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":38,"gated":false,"id":"loubnabnl/comments_preceding_bots","lastModified":"2023-02-24T19:50:14.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":183,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/pre-processed-issues","lastModified":"2023-02-24T16:53:22.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":6759,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":32,"gated":false,"id":"loubnabnl/preprocessed-issues","lastModified":"2023-02-24T14:54:29.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":7351,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/bot_issues","lastModified":"2023-02-24T13:32:00.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1155,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/bigcode-data-stats","lastModified":"2023-02-01T14:25:41.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":40,"gated":false,"id":"loubnabnl/data_toloka","lastModified":"2022-12-21T01:39:36.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":108,"libraries":["datasets","dask","mlcroissant"],"formats":["json"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/data-filtering-statistics","lastModified":"2022-12-14T13:32:36.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":13,"gated":false,"id":"loubnabnl/rho-loss-dataset","lastModified":"2022-12-08T11:03:02.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":392756,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":48,"gated":false,"id":"loubnabnl/stack-filtered-pii-1M-java","lastModified":"2022-12-06T01:28:17.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":1000000,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":35,"gated":false,"id":"loubnabnl/python_comment_code_ratio_08","lastModified":"2022-11-23T16:02:40.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":131,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":34,"gated":false,"id":"loubnabnl/dummy_data_clean","lastModified":"2022-11-09T17:05:43.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":400,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":31,"gated":false,"id":"loubnabnl/code-generations-bigcode","lastModified":"2022-10-25T15:53:42.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":94,"gated":false,"id":"loubnabnl/humaneval_infilling","lastModified":"2022-10-21T10:37:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":8652,"libraries":["datasets","mlcroissant"],"formats":[],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":5,"gated":false,"id":"loubnabnl/language_detection_in_code","lastModified":"2022-10-13T16:08:47.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":2000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":6,"gated":false,"id":"loubnabnl/language_id_bigcode","lastModified":"2022-10-11T15:00:19.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":8000,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":7,"gated":false,"id":"loubnabnl/bigcode_csharp","lastModified":"2022-10-10T23:35:32.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":81533,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["json"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":8,"gated":false,"id":"loubnabnl/github_jupyter_parsed_2","lastModified":"2022-09-19T08:36:13.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":451662,"libraries":["datasets","pandas","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":56,"gated":false,"id":"loubnabnl/github-code-small-filtering","lastModified":"2022-07-11T23:18:05.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":61538687,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":87,"gated":false,"id":"loubnabnl/github-small-near-dedup","lastModified":"2022-07-08T10:28:11.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":48618295,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":106,"gated":false,"id":"loubnabnl/github-code-clean-small","lastModified":"2022-07-06T09:58:52.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":70590990,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":127,"gated":false,"id":"loubnabnl/github-code-more-filtering","lastModified":"2022-06-30T22:28:37.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":99562534,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":2,"isLikedByUser":false},{"author":"loubnabnl","downloads":758,"gated":false,"id":"loubnabnl/github-clean-v1","lastModified":"2022-06-28T00:59:02.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":31279484,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":1575,"gated":false,"id":"loubnabnl/github-code-duplicate","lastModified":"2022-06-27T20:02:27.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":115086922,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["text"]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":37,"gated":false,"id":"loubnabnl/github-clean","lastModified":"2022-06-25T11:36:42.000Z","private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":622,"gated":false,"id":"loubnabnl/tokenized-github-code-python","lastModified":"2022-04-28T00:13:55.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":7226626,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":[]},"private":false,"repoType":"dataset","likes":1,"isLikedByUser":false},{"author":"loubnabnl","downloads":696,"gated":false,"id":"loubnabnl/tokenized-codeparrot-train","lastModified":"2022-04-22T16:19:47.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":5300000,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false},{"author":"loubnabnl","downloads":16,"gated":false,"id":"loubnabnl/tokenized-codeparrot-valid","lastModified":"2022-04-22T14:29:06.000Z","datasetsServerInfo":{"viewer":"viewer","numRows":61373,"libraries":["datasets","dask","mlcroissant","polars"],"formats":["parquet"],"modalities":["tabular","text"]},"private":false,"repoType":"dataset","likes":0,"isLikedByUser":false}],"models":[{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/test","inference":"library-not-detected","lastModified":"2024-10-16T13:35:05.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":7,"gated":false,"id":"loubnabnl/smollm-135M-instruct-v2","inference":"library-not-detected","lastModified":"2024-08-26T08:06:28.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-1.7B-instruct-v2","inference":"library-not-detected","lastModified":"2024-08-23T10:47:17.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":9,"gated":false,"id":"loubnabnl/smollm-360M-instruct-add-basics-w-math","inference":"library-not-detected","lastModified":"2024-08-13T15:47:25.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-135M-instruct-add-basics-w-math","inference":"library-not-detected","lastModified":"2024-08-13T13:28:18.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-350M-instruct-add-basics-eq","inference":"library-not-detected","lastModified":"2024-08-13T00:22:55.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":8,"gated":false,"id":"loubnabnl/smollm-350M-instruct-add-basics-only","inference":"library-not-detected","lastModified":"2024-08-13T00:21:42.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":7,"gated":false,"id":"loubnabnl/SmolLM-360M-4bit","inference":"not-popular-enough","lastModified":"2024-08-12T21:52:46.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":7,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test2-noOH","inference":"library-not-detected","lastModified":"2024-08-12T17:24:45.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test2-ep2","inference":"library-not-detected","lastModified":"2024-08-12T16:39:34.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":11,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test2","inference":"library-not-detected","lastModified":"2024-08-12T16:00:33.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/smollm-350M-instruct-test","inference":"library-not-detected","lastModified":"2024-08-06T15:36:09.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":3,"gated":false,"id":"loubnabnl/outputs","inference":"pipeline-not-detected","lastModified":"2024-02-27T12:35:45.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/starcoder_tokenizer","inference":"library-not-detected","lastModified":"2023-12-22T16:09:31.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":12,"gated":false,"id":"loubnabnl/CodeLlama-7b-hf","inference":"not-popular-enough","lastModified":"2023-08-28T17:07:20.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/codellama-7b","inference":"library-not-detected","lastModified":"2023-08-28T16:50:15.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/model-1b-debug","inference":"library-not-detected","lastModified":"2023-06-22T10:07:12.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/hub_logs","inference":"library-not-detected","lastModified":"2023-06-21T16:38:01.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/starcoder-1b","inference":"not-popular-enough","lastModified":"2023-06-21T10:38:33.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/tokenizer-slimpajama","inference":"library-not-detected","lastModified":"2023-06-19T10:49:12.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/tokenizer-pile","inference":"library-not-detected","lastModified":"2023-06-19T10:34:23.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/starcoder-5b-noconf","inference":"library-not-detected","lastModified":"2023-06-15T12:34:39.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/starcoder-tokenizer","inference":"library-not-detected","lastModified":"2023-05-23T08:50:02.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/megatron-tp2","inference":"library-not-detected","lastModified":"2023-03-21T21:52:56.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/sl-test-1","inference":"library-not-detected","lastModified":"2023-03-11T14:27:46.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":17,"gated":false,"id":"loubnabnl/santacoder-393B-tokens","inference":"custom-code","lastModified":"2023-03-08T14:00:32.000Z","likes":1,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":22,"gated":false,"id":"loubnabnl/santacoder-code-to-text","inference":"custom-code","lastModified":"2023-02-02T10:16:02.000Z","likes":5,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":0,"gated":false,"id":"loubnabnl/santacoder-finetuned-the-stack-bash-3","inference":"library-not-detected","lastModified":"2023-01-22T00:46:34.000Z","likes":0,"private":false,"repoType":"model","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":16,"gated":false,"id":"loubnabnl/rho-loss-baseline-model","inference":"not-popular-enough","lastModified":"2022-12-09T12:45:04.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":17,"gated":false,"id":"loubnabnl/apps-1.5B-model","inference":"not-popular-enough","lastModified":"2022-07-28T15:40:49.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/codeparrot-small-multi-small-near-dedup","inference":"not-popular-enough","lastModified":"2022-07-18T09:20:36.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":14,"gated":false,"id":"loubnabnl/codeparrot-small-near-dedup","inference":"not-popular-enough","lastModified":"2022-06-18T20:59:13.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"downloads":18,"gated":false,"id":"loubnabnl/codeparrot-small-scale","inference":"not-popular-enough","lastModified":"2022-05-15T14:34:14.000Z","likes":0,"pipeline_tag":"text-generation","private":false,"repoType":"model","isLikedByUser":false,"widgetOutputUrls":[]}],"numberLikes":89,"papers":[{"id":"2406.17557","title":"The FineWeb Datasets: Decanting the Web for the Finest Text Data at\n Scale","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2406.17557.png","upvotes":86,"publishedAt":"2024-06-25T13:50:56.000Z","isUpvotedByUser":false},{"id":"2405.18392","title":"Scaling Laws and Compute-Optimal Training Beyond Fixed Training\n Durations","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2405.18392.png","upvotes":12,"publishedAt":"2024-05-28T17:33:54.000Z","isUpvotedByUser":false},{"id":"2402.19173","title":"StarCoder 2 and The Stack v2: The Next Generation","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2402.19173.png","upvotes":136,"publishedAt":"2024-02-29T13:53:35.000Z","isUpvotedByUser":false},{"id":"2305.06161","title":"StarCoder: may the source be with you!","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2305.06161.png","upvotes":30,"publishedAt":"2023-05-09T08:16:42.000Z","isUpvotedByUser":false},{"id":"2303.03915","title":"The BigScience ROOTS Corpus: A 1.6TB Composite Multilingual Dataset","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2303.03915.png","upvotes":6,"publishedAt":"2023-03-07T14:25:44.000Z","isUpvotedByUser":false},{"id":"2301.03988","title":"SantaCoder: don't reach for the stars!","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2301.03988.png","upvotes":7,"publishedAt":"2023-01-09T10:52:35.000Z","isUpvotedByUser":false},{"id":"2211.15533","title":"The Stack: 3 TB of permissively licensed source code","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2211.15533.png","upvotes":5,"publishedAt":"2022-11-20T18:15:30.000Z","isUpvotedByUser":false},{"id":"2211.05100","title":"BLOOM: A 176B-Parameter Open-Access Multilingual Language Model","thumbnailUrl":"https://cdn-thumbnails.huggingface.co/social-thumbnails/papers/2211.05100.png","upvotes":28,"publishedAt":"2022-11-09T18:48:09.000Z","isUpvotedByUser":false}],"posts":[{"slug":"547206198374677","content":[{"type":"text","value":"Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit πŸ› οΈ ","raw":"Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit πŸ› οΈ "},{"type":"link","href":"https://github.com/huggingface/smollm/","raw":"https://github.com/huggingface/smollm/"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Pre-training code with nanotron","raw":"- Pre-training code with nanotron"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Evaluation suite with lighteval","raw":"- Evaluation suite with lighteval"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Synthetic data generation using distilabel (powers our new SFT dataset ","raw":"- Synthetic data generation using distilabel (powers our new SFT dataset "},{"type":"resource","resource":{"type":"dataset","id":"HuggingFaceTB/smoltalk"},"url":"https://huggingface.co/datasets/HuggingFaceTB/smoltalk","raw":"https://huggingface.co/datasets/HuggingFaceTB/smoltalk"},{"type":"text","value":")","raw":")"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- Post-training scripts with TRL & the alignment handbook","raw":"- Post-training scripts with TRL & the alignment handbook"},{"type":"new_line","raw":"\n"},{"type":"text","value":"- On-device tools with llama.cpp for summarization, rewriting & agents","raw":"- On-device tools with llama.cpp for summarization, rewriting & agents"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Apache 2.0 licensed. V2 pre-training data mix coming soon!","raw":"Apache 2.0 licensed. V2 pre-training data mix coming soon!"},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Which other tools should we add next?","raw":"Which other tools should we add next?"}],"rawContent":"Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit πŸ› οΈ https://github.com/huggingface/smollm/\n\n- Pre-training code with nanotron\n- Evaluation suite with lighteval\n- Synthetic data generation using distilabel (powers our new SFT dataset https://huggingface.co/datasets/HuggingFaceTB/smoltalk)\n- Post-training scripts with TRL & the alignment handbook\n- On-device tools with llama.cpp for summarization, rewriting & agents\n\nApache 2.0 licensed. V2 pre-training data mix coming soon!\n\nWhich other tools should we add next?","author":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374,"isFollowing":false},"attachments":[],"mentions":[],"reactions":[{"reaction":"πŸ”₯","users":["reach-vb","John6666","not-lain","AtAndDev","Joseph717171","vansin"],"count":6},{"reaction":"πŸ€—","users":["Joseph717171"],"count":1}],"publishedAt":"2024-11-24T16:00:22.000Z","updatedAt":"2024-11-24T16:01:53.795Z","commentators":[],"url":"/posts/loubnabnl/547206198374677","totalUniqueImpressions":1360,"numComments":0},{"slug":"634384490754714","content":[{"type":"text","value":"🍷 FineWeb technical report is out and so is πŸ“š FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA.","raw":"🍷 FineWeb technical report is out and so is πŸ“š FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Technical report: ","raw":"Technical report: "},{"type":"resource","resource":{"type":"space","id":"HuggingFaceFW/blogpost-fineweb-v1"},"url":"https://hf.co/spaces/HuggingFaceFW/blogpost-fineweb-v1","raw":"https://hf.co/spaces/HuggingFaceFW/blogpost-fineweb-v1"},{"type":"text","value":" ","raw":" "},{"type":"new_line","raw":"\n"},{"type":"text","value":"Dataset: ","raw":"Dataset: "},{"type":"resource","resource":{"type":"dataset","id":"HuggingFaceFW/fineweb-edu"},"url":"https://hf.co/datasets/HuggingFaceFW/fineweb-edu","raw":"https://hf.co/datasets/HuggingFaceFW/fineweb-edu"},{"type":"text","value":" ","raw":" "},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"We used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens. ","raw":"We used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens. "},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"You can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets.","raw":"You can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets."},{"type":"new_line","raw":"\n"},{"type":"new_line","raw":"\n"},{"type":"text","value":"Enjoy!","raw":"Enjoy!"}],"rawContent":"🍷 FineWeb technical report is out and so is πŸ“š FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA.\n\nTechnical report: https://hf.co/spaces/HuggingFaceFW/blogpost-fineweb-v1 \nDataset: https://hf.co/datasets/HuggingFaceFW/fineweb-edu \n\nWe used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens. \n\nYou can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets.\n\nEnjoy!","author":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374,"isFollowing":false},"attachments":[{"type":"image","url":"https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/zlFOO3Gh5zPpJ-vjvSMEs.png"}],"mentions":[],"reactions":[{"reaction":"πŸ”₯","users":["mmhamdy","alielfilali01","GPT007","arjunguha","hiauiarau","nicolay-r","privategeek24","Ariel323","maywell","guipenedo","neuralink","asaduzzaman319"],"count":12},{"reaction":"πŸ‘","users":["dillfrescott","guipenedo","neuralink"],"count":3},{"reaction":"🧠","users":["neuralink","louisbrulenaudet"],"count":2}],"publishedAt":"2024-06-02T08:15:54.000Z","updatedAt":"2024-06-02T08:16:14.466Z","commentators":[],"url":"/posts/loubnabnl/634384490754714","totalUniqueImpressions":5105,"numComments":0}],"totalPosts":4,"spaces":[{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"green","colorTo":"purple","createdAt":"2024-08-15T21:51:03.000Z","emoji":"🀏","id":"loubnabnl/test-smollm","lastModified":"2024-08-16T14:29:44.000Z","likes":1,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNNING","hardware":{"current":"cpu-basic","requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-test-smollm.hf.space","isCustom":false,"stage":"READY"}],"sha":"546fa514c1327e57c03d4f41cfccd8a0be637ccd"},"title":"Smol Playground","isLikedByUser":false,"originSpace":{"name":"HuggingFaceTB/instant-smollm","author":{"_id":"648a374f00f7a3374ee64b99","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/648a374f00f7a3374ee64b99/YPwSOrronoozwHbJchPn3.jpeg","fullname":"Caleb Fahlgren","name":"cfahlgren1","type":"user","isPro":true,"isHf":true,"isMod":false,"followerCount":134}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"indigo","colorTo":"green","createdAt":"2024-05-03T10:28:35.000Z","emoji":"πŸ‘","id":"loubnabnl/inspect_sanchit_annotations","lastModified":"2024-05-03T10:29:05.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-inspect-sanchit-annotations.hf.space","isCustom":false,"stage":"READY"}]},"title":"Inspect Sanchit Anotations","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"blue","colorTo":"pink","createdAt":"2024-03-28T21:01:21.000Z","emoji":"🏒","id":"loubnabnl/zero-gpu","lastModified":"2024-04-22T09:47:47.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"zero-a10g"},"storage":null,"gcTimeout":172800,"errorMessage":"Launch timed out, workload was not healthy after 30 min","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-zero-gpu.hf.space","isCustom":false,"stage":"READY"}]},"title":"Zero Gpu","isLikedByUser":false},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"purple","colorTo":"blue","createdAt":"2024-03-07T15:42:22.000Z","emoji":"πŸ•ΈοΈ","id":"loubnabnl/inspect_selfcheck","lastModified":"2024-03-13T15:56:41.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":172800,"errorMessage":"","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-inspect-selfcheck.hf.space","isCustom":false,"stage":"READY"}]},"title":"selfcheck","isLikedByUser":false,"originSpace":{"name":"HuggingFaceTB/inspect_cosmopedia","author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png","fullname":"Hugging Face TB Research","name":"HuggingFaceTB","type":"org","isHf":false,"isMod":false,"isEnterprise":true,"followerCount":610}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"gray","colorTo":"gray","createdAt":"2023-09-13T07:53:48.000Z","emoji":"❀️","id":"loubnabnl/Nt3awnou-rescue-map2","lastModified":"2023-09-13T23:38:21.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"errorMessage":"Launch timed out, space was not healthy after 30 min","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-nt3awnou-rescue-map2.hf.space","isCustom":false,"stage":"READY"}]},"title":"Nt3awnu Map","isLikedByUser":false,"originSpace":{"name":"nt3awnou/Nt3awnou-rescue-map","author":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5ff8c9f4b2035d9a81a859f7/PCXLFCzqijRtBxma4JdsJ.png","fullname":"Nt3awnou","name":"nt3awnou","type":"org","isHf":false,"isMod":false,"isEnterprise":false,"followerCount":27}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"purple","colorTo":"purple","createdAt":"2023-03-30T13:26:06.000Z","emoji":"πŸ”₯","id":"loubnabnl/diff-visualizer","lastModified":"2023-08-27T11:35:52.000Z","likes":1,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"SLEEPING","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-diff-visualizer.hf.space","isCustom":false,"stage":"READY"}]},"title":"Diff Visualizer","isLikedByUser":false,"originSpace":{"name":"SaulLu/diff-visualizer","author":{"_id":"60741a2e69a66931a0273f0c","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1618938489629-60741a2e69a66931a0273f0c.png","fullname":"Lucile Saulnier","name":"SaulLu","type":"user","isPro":false,"isHf":false,"isMod":false,"followerCount":119}}},{"author":"loubnabnl","authorData":{"_id":"61c141342aac764ce1654e43","avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","fullname":"Loubna Ben Allal","name":"loubnabnl","type":"user","isPro":false,"isHf":true,"isMod":false,"followerCount":2374},"colorFrom":"red","colorTo":"blue","createdAt":"2022-12-29T10:43:47.000Z","emoji":"πŸ’¬","id":"loubnabnl/the-stack-bot","lastModified":"2023-02-27T16:46:00.000Z","likes":0,"pinned":false,"private":false,"repoType":"space","runtime":{"stage":"RUNTIME_ERROR","hardware":{"current":null,"requested":"cpu-basic"},"storage":null,"gcTimeout":86400,"errorMessage":"Traceback (most recent call last):\n File \"/home/user/.local/bin/streamlit\", line 5, in \n from streamlit.web.cli import main\n File \"/home/user/.local/lib/python3.8/site-packages/streamlit/__init__.py\", line 55, in \n from streamlit.delta_generator import DeltaGenerator as _DeltaGenerator\n File \"/home/user/.local/lib/python3.8/site-packages/streamlit/delta_generator.py\", line 45, in \n from streamlit.elements.arrow_altair import ArrowAltairMixin\n File \"/home/user/.local/lib/python3.8/site-packages/streamlit/elements/arrow_altair.py\", line 35, in \n from altair.vegalite.v4.api import Chart\nModuleNotFoundError: No module named 'altair.vegalite.v4'\n","replicas":{"current":1,"requested":1},"devMode":false,"domains":[{"domain":"loubnabnl-the-stack-bot.hf.space","isCustom":false,"stage":"READY"}]},"title":"The Stack Bot","isLikedByUser":false}],"u":{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61c141342aac764ce1654e43/81AwoT5IQ_Xdw0OVw7TKu.jpeg","isPro":false,"fullname":"Loubna Ben Allal","user":"loubnabnl","orgs":[{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1583856921041-5dd96eb166059660ed1ee413.png","fullname":"Hugging Face","name":"huggingface","userRole":"write","type":"org","isHf":true},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1634806038075-5df7e9e5da6d0311fd3d53f9.png","fullname":"BigScience Workshop","name":"bigscience","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1637946025573-5fbfd09ee366524fe8e97cd3.webp","fullname":"BigScience Catalogue Data","name":"bigscience-catalogue-data","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1642714419598-5fbfd09ee366524fe8e97cd3.webp","fullname":"BigScience Data","name":"bigscience-data","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61140e35d08630d2676c9829/HdUBhsZwa2fJq6pP0pM0H.png","fullname":"HuggingFaceBR4","name":"HuggingFaceBR4","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/6e5f8ee29ce0e7becbbe45fd7ee8bffc?d=retro&size=100","fullname":"Team 8","name":"Team8","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1655756383598-61c141342aac764ce1654e43.png","fullname":"CodeParrot","name":"codeparrot","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1659521200179-5e48005437cb5b49818287a5.png","fullname":"BigCode","name":"bigcode","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/j26aNEdiOgptZxJ6akGCC.png","fullname":"Hugging Face H4","name":"HuggingFaceH4","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1663704750230-5dd96eb166059660ed1ee413.png","fullname":"CompVis Community","name":"compvis-community","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/1663191953132-61c141342aac764ce1654e43.png","fullname":"BigCode Data","name":"bigcode-data","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/b7f53b5386493a352a5dd4b01d2e0ade?d=retro&size=100","fullname":"LocalCodeLLMs","name":"local-code-llms","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f0c746619cb630495b814fd/KXY7ApNHoUzmZsDM3Vchx.png","fullname":"Need4Speed","name":"need-for-speed","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5ff5d596f244529b3ec0fb89/d3KMtMG5-XAS2uhbp82Qz.png","fullname":"Code Llama","name":"codellama","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/e4VK7uW5sTeCYupD0s_ob.png","fullname":"Hugging Face TB Research","name":"HuggingFaceTB","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/bd6f32283561374ab3e454f334600863?d=retro&size=100","fullname":"Hugging Face Smol Cluster","name":"HFSmolCluster","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5ff8c9f4b2035d9a81a859f7/PCXLFCzqijRtBxma4JdsJ.png","fullname":"Nt3awnou","name":"nt3awnou","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6032802e1f993496bc14d9e3/3s4vnB6_xPaPPWcZkF6jT.jpeg","fullname":"huggingPartyParis","name":"HuggingPartyParis","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png","fullname":"Qwen","name":"Qwen","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/exBCcWH8McHg1hVQAtvN5.png","fullname":"ZeroGPU Explorers","name":"zero-gpu-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/703e31cb0f692d47af3f21b7b73ca944?d=retro&size=100","fullname":"HF AFAIK","name":"afaik-hf","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/1f34d5d65a1686192504327df19cb60b?d=retro&size=100","fullname":"gg-hf","name":"gg-hf","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/4d40163d9b45c74754e0919aab851407?d=retro&size=100","fullname":"Nanotron Research","name":"nanotron","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/61d354424d8b92469b15345c/TSO3ZwxLYEcg7QNJsr3I5.png","fullname":"Women on Hugging Face","name":"WomenonHuggingFace","userRole":"contributor","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/651e96991b97c9f33d26bde6/R0pIOsiMiw3YnfGvtqwna.png","fullname":"Hugging Face SMOL","name":"HuggingFaceSmol","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/62596f9e1c0a084224b93e00/EfmW5LH_nj0FCEZH7wH2p.png","fullname":"HuggingFaceFW","name":"HuggingFaceFW","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/1aa77d2e6163c9a360c2dcb928488af8?d=retro&size=100","fullname":"bigcode nvidia","name":"bigcode-nvidia","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/nxmdd6m86cxu55UZBlQeg.jpeg","fullname":"Social Post Explorers","name":"social-post-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5f17f0a0925b9863e28ad517/V8fnWFEWwXTgCQuIHnPmk.png","fullname":"Dev Mode Explorers","name":"dev-mode-explorers","userRole":"read","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/adbb6fae1cbe225aaaa9db237369dbc3?d=retro&size=100","fullname":"Cosmopedia Stories Collab","name":"cosmopedia-stories-collab","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/602e6dee60e3dd96631c906e/DjTUNJIAMsFzrjzm84Dr-.png","fullname":"StarCoder2 Data","name":"starcoder2data","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/b8e6a19d52546ec99766d15d3402212c?d=retro&size=100","fullname":"Data Agents","name":"data-agents","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/60f2fc91b92afccb7c34b8ed/ndW0EQekNd4krxBkQ-S4W.png","fullname":"Argilla Warehouse","name":"argilla-warehouse","userRole":"write","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/fa4ba2b868f91d84f74f7db74133c5c3?d=retro&size=100","fullname":"smol-explorers","name":"smol-explorers","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://www.gravatar.com/avatar/6da47922b9be34c97ecbcba7bb557264?d=retro&size=100","fullname":"swissai-hf-data","name":"swissai-hf-data","userRole":"admin","type":"org","isHf":false},{"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/5e48005437cb5b49818287a5/oges-i7Dd9Fs0FDrh3bEs.png","fullname":"Hugging Face Science","name":"science","userRole":"write","type":"org","isHf":false}],"signup":{"github":"loubnabnl","twitter":"LoubnaBenAllal1","details":"LLMs, ML for code, Synthetic data","homepage":"https://loubnabnl.github.io/","bluesky":"loubnanl.bsky.social","linkedin":""},"isHf":true,"isMod":false,"type":"user"},"upvotes":12,"repoFilterModels":{"sortKey":"modified"},"repoFilterDatasets":{"sortKey":"modified"},"repoFilterSpaces":{"sortKey":"modified"},"numFollowers":2374,"numFollowing":57,"isFollowing":false,"isFollower":false,"sampleFollowers":[{"user":"MackinationsAi","fullname":"AiM","type":"user","_id":"662207ab89fc1bc582895ab0","isPro":true,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/noauth/9FdtdYBfHv__g1YcRGXEr.png"},{"user":"Annduril","fullname":"Luis Alfonso Gutierrez","type":"user","_id":"6366b5e96604a4fee8571fec","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/6366b5e96604a4fee8571fec/-3HqRLsfOARNf0-XP61jD.jpeg"},{"user":"luckywu720","fullname":"lucky WU","type":"user","_id":"66014011fe3a7a542cd6ea84","isPro":false,"avatarUrl":"/avatars/06c51a005b87c7e3ec36dc9ea2d7cd51.svg"},{"user":"Profrandom","fullname":"Alexander Koelnberger","type":"user","_id":"671f6ab61925c496300ed8ab","isPro":false,"avatarUrl":"https://cdn-avatars.huggingface.co/v1/production/uploads/no-auth/7gXO_bHTxyJO6GefQ5Id5.png"}],"isWatching":false,"hardwareItems":[],"acceptLanguages":["en","*"]}">

Loubna Ben Allal

loubnabnl

AI & ML interests

LLMs, ML for code, Synthetic data

Recent Activity

Reacted to merve's post with πŸ”₯ 3 days ago
Last week we were blessed with open-source models! A recap πŸ’ https://huggingface.co/collections/merve/nov-29-releases-674ccc255a57baf97b1e2d31 πŸ–ΌοΈ Multimodal > At Hugging Face we released SmolVLM, a performant and efficient smol vision language model πŸ’— > Show Lab released ShowUI-2B: new vision-language-action model to build GUI/web automation agents πŸ€– > Rhymes AI has released the base model of Aria: Aria-Base-64K and Aria-Base-8K with their respective context length > ViDoRe team released ColSmolVLM: A new ColPali-like retrieval model based on SmolVLM > Dataset: Llava-CoT-o1-Instruct: new dataset labelled using Llava-CoT multimodal reasoning modelπŸ“– > Dataset: LLaVA-CoT-100k dataset used to train Llava-CoT released by creators of Llava-CoT πŸ“• πŸ’¬ LLMs > Qwen team released QwQ-32B-Preview, state-of-the-art open-source reasoning model, broke the internet πŸ”₯ > AliBaba has released Marco-o1, a new open-source reasoning model πŸ’₯ > NVIDIA released Hymba 1.5B Base and Instruct, the new state-of-the-art SLMs with hybrid architecture (Mamba + transformer) ⏯️ Image/Video Generation > Qwen2VL-Flux: new image generation model based on Qwen2VL image encoder, T5 and Flux for generation > Lightricks released LTX-Video, a new DiT-based video generation model that can generate 24 FPS videos at 768x512 res ⏯️ > Dataset: Image Preferences is a new image generation preference dataset made with DIBT community effort of Argilla 🏷️ Audio > OuteAI released OuteTTS-0.2-500M new multilingual text-to-speech model based on Qwen-2.5-0.5B trained on 5B audio prompt tokens
View all activity

Articles

Organizations

Hugging Face's profile picture BigScience Workshop's profile picture BigScience Catalogue Data's profile picture BigScience Data's profile picture HuggingFaceBR4's profile picture Team 8's profile picture CodeParrot's profile picture BigCode's profile picture Hugging Face H4's profile picture CompVis Community's profile picture BigCode Data's profile picture LocalCodeLLMs's profile picture Need4Speed's profile picture Code Llama's profile picture Hugging Face TB Research's profile picture Hugging Face Smol Cluster's profile picture Nt3awnou's profile picture huggingPartyParis's profile picture Qwen's profile picture ZeroGPU Explorers's profile picture HF AFAIK's profile picture gg-hf's profile picture Nanotron Research's profile picture Women on Hugging Face's profile picture Hugging Face SMOL's profile picture HuggingFaceFW's profile picture bigcode nvidia's profile picture Social Post Explorers's profile picture Dev Mode Explorers's profile picture Cosmopedia Stories Collab's profile picture StarCoder2 Data's profile picture Data Agents's profile picture Argilla Warehouse's profile picture smol-explorers's profile picture swissai-hf-data's profile picture Hugging Face Science's profile picture

Posts 4

view post
Post
1360
Making SmolLM2 reproducible: open-sourcing our training & evaluation toolkit πŸ› οΈ https://github.com/huggingface/smollm/

- Pre-training code with nanotron
- Evaluation suite with lighteval
- Synthetic data generation using distilabel (powers our new SFT dataset HuggingFaceTB/smoltalk)
- Post-training scripts with TRL & the alignment handbook
- On-device tools with llama.cpp for summarization, rewriting & agents

Apache 2.0 licensed. V2 pre-training data mix coming soon!

Which other tools should we add next?
view post
Post
5105
🍷 FineWeb technical report is out and so is πŸ“š FineWeb-Edu, a 1.3 trillion tokens dataset that outperforms all other open web datasets, with remarkable improvements on educational benchmarksΒ such as MMLU, ARC, and OpenBookQA.

Technical report: HuggingFaceFW/blogpost-fineweb-v1
Dataset: HuggingFaceFW/fineweb-edu

We used Llama 3 generations to train an educational quality classifier, filtering the 15 trillion tokens of FineWeb to select only those with high educational value (an approach also used in Llama 3 and Phi-3 training datasets). We're releasing both FineWeb-Edu and the classifier, along with a larger, less heavily filtered version containing 5.4 trillion tokens.

You can find more details about the dataset and the experiments we ran in the FineWeb technical report, It's a 45-minute read but it contains all the secret sauce for building high quality web datasets.

Enjoy!