mirror of
https://github.com/mukunku/ParquetViewer.git
synced 2026-06-21 12:48:09 +00:00
Compare commits
3 commits
v4.0.0-rel
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a9a3548588 |
||
|
|
52cf523f7a |
||
|
|
37b603f765 |
116 changed files with 4645 additions and 1688 deletions
22
.github/ISSUE_TEMPLATE/translation_template.csv
vendored
22
.github/ISSUE_TEMPLATE/translation_template.csv
vendored
|
|
@ -75,13 +75,7 @@
|
|||
"MainForm.resx","userGuideToolStripMenuItem.Text",,"User Guide","Kullanıcı Kılavuzu",""
|
||||
"MetadataViewer.resx","$this.Text",,"Parquet Metadata Viewer","Parquet Metadata Önizleyicisi",""
|
||||
"MetadataViewer.resx","closeButton.Text",,"Close","Kapat",""
|
||||
"MetadataViewer.resx","copyRawThriftMetadataButton.Text",,"Copy Raw Metadata","Ham Metadatayı Kopyala",""
|
||||
"MetadataViewer.resx","copyRawThriftMetadataButton.ToolTip",,"Exports full, raw Thrift metadata to the clipboard.","Ham Thrift metadatasını panoya kopyalar",""
|
||||
"MetadataViewer.resx","loadingTab.Text",,"Loading...","Yükleniyor...",""
|
||||
"QuickPeekForm.resx","$this.Text",,"Quick Peek","Hızlı Önizleme",""
|
||||
"QuickPeekForm.resx","copyToClipboardToolStripMenuItem.Text",,"Copy to clipboard","Panoya kopyala",""
|
||||
"QuickPeekForm.resx","saveImageToFileButton.Text",,"Save as PNG","PNG olarak Kaydet",""
|
||||
"QuickPeekForm.resx","takeMeBackLinkLabel.Text",,"<<< back","<<< geri",""
|
||||
"Errors.resx","CopyAsWhereTooLargeErrorMessage","Shown when the user right-click's on too many cells and selects the Copy as WHERE... option","The selected data is too large. Please select less cells.","Çok fazla veri seçili. Lütfen daha az hücre seçin.",""
|
||||
"Errors.resx","CopyAsWhereTooLargeErrorTitle","Shown when the user right-click's on too many cells and selects the Copy as WHERE... option","Copy to clipboard failed","Panoya kopyalama başarısız oldu",""
|
||||
"Errors.resx","CopyErrorMessageText","Shown in the messagebox for unhandled exceptions to teach users how to copy the error text","(CTRL+C to copy)","(Kopyalamak için CTRL+C)",""
|
||||
|
|
@ -90,6 +84,7 @@
|
|||
"Errors.resx","CopyToClipboardErrorTitle","Shown when copying to the clipboard fails","Copy to clipboard failed","Panoya kopyalama başarısız oldu",""
|
||||
"Errors.resx","DecimalValueTooLargeErrorMessageFormat","Shown when a field contains decimal values outside the supported range for .NET","Field `{0}` with type DECIMAL({1}, {2}) contains values outside ParquetViewer's supported range between DECIMAL({3}, {4}) and DECIMAL({3}, 0)","DECIMAL({1}, {2}) tipli alan `{0}` ParquetViewer'ın desteklediği aralığın dışında bulunuyor: DECIMAL({3}, {4}) ve DECIMAL({3}, 0)",""
|
||||
"Errors.resx","DecimalValueTooLargeErrorTitle","Shown when a field contains decimal values outside the supported range for .NET","Decimal value too large","Decimal değeri çok büyük",""
|
||||
"Errors.resx","DecimalValueUnknownSizeTooLargeErrorMessageFormat","Shown when a field contains decimal values outside the supported range for .NET AND we don't know it's name, scale, or precision","Encountered DECIMAL data outside ParquetViewer's supported range between DECIMAL({3}, {4}) and DECIMAL({3}, 0)","ParquetViewer'ın desteklediği aralığın, DECIMAL({3}, {4}) ve DECIMAL({3}, 0), dışında bir DECIMAL değerine rastlanıldı",""
|
||||
"Errors.resx","ExportFailedErrorTitle","Shown when an exception is encountered during a file export","File export failed","Dosyaya aktarım başarısız oldu",""
|
||||
"Errors.resx","FieldListGenerationError","Generic error shown when the field list in the field selection dialog cannot be rendered for some reason","Something went wrong while generating the field list.","Alan listesi oluşturulurken bir hata oldu.",""
|
||||
"Errors.resx","FileAssociationFailedErrorMessageFormat","Shown when file association fails in the Help page","Something went wrong (Error code: {0}). Try running ParquetViewer as administrator and try again. ","Birşey ters gitti (Hata kodu: {0}). ParquetViewer'ı yönetici olarak çaliştırıp tekrar deneyin.",""
|
||||
|
|
@ -99,6 +94,8 @@
|
|||
"Errors.resx","InvalidDateFormatErrorTitle","Shown in the custom date format dialog when the user tries to save an invalid date","Invalid Date Format","Geçersiz Tarih Formatı",""
|
||||
"Errors.resx","InvalidQueryErrorMessage","Thrown when an invalid query is executed","The query doesn't seem to be valid. Please try again.","Sorgu geçerli gözükmüyor.",""
|
||||
"Errors.resx","InvalidQueryErrorTitle","Shown when the user inputs an invalid filter query and clicks Execute","Invalid Query","Geçersiz Sorgu",""
|
||||
"Errors.resx","ListsWithNullsErrorMessage","Shown when using the Query Editor and a List field in the result contains nulls","Lists with null values are not supported. Relevant rows have been removed.","null içeren Listeler desteklenmemektedir.",""
|
||||
"Errors.resx","ListsWithNullsErrorTitle","Shown when using the Query Editor and a List field in the result contains nulls","Could not read all records","Tüm kayıtlar okunamadı",""
|
||||
"Errors.resx","MalformedFieldErrorMessageFormat","Shown to users when ParquetViewer fails to read a specific field. It should prompt the user to open a bug ticket if they believe the file is valid","{0}
|
||||
|
||||
If you think the file is valid please consider opening an issue in the GitHub repo. See: Help → About","{0}
|
||||
|
|
@ -115,6 +112,8 @@ Eğer dosyanın aslında geçerli olduğunu düşünüyorsanız lutfen projenin
|
|||
"Errors.resx","NoValidParquetFilesFoundErrorMessage","Shown when the user tries to open a folder and all found parquet files are invalid","No valid parquet files found in folder. Invalid parquet files:","Geçerli parquet dosyası bulunamadı. Bulunan geçersiz dosyalar:",""
|
||||
"Errors.resx","OpenFileNoLongerExistsErrorMessageFormat","Shown in the unlikely event where the open file(s) were somehow deleted","The specified file/folder no longer exists: {0}Please try opening a new file or folder.","Belirtilen dosya/klasör artık bulunamıyor: {0}Lütfen başka bir dosya veya klasör açın.",""
|
||||
"Errors.resx","ParquetSchemaReadErrorMessage","Generic error shown when ParquetViewer is unable to read the schema of a parquet file","Could not read parquet schema.","Parquet şeması okunamadı.",""
|
||||
"Errors.resx","QueryExecutionErrorTitle","Shown in the Query Editor when the query fails to run due to an unexpected exception","Error executing query","Sorgu işlenirken bir hata oluştu",""
|
||||
"Errors.resx","RenderResultsErrorTitle","Shown when values can't be converted/shown in the Query Editor page","Error rendering results","Sonuçlar gösterilirken bir hata oluştu",""
|
||||
"Errors.resx","SelectAtLeastOneFieldErrorMessage","Shown when the user tries to close the field selection dialog without selecting any fields","Please select at least one field","En az bir alan seçin",""
|
||||
"Errors.resx","SelectAtLeastOneFieldErrorTitle","Shown when the user tries to close the field selection dialog without selecting any fields","Error","Hata",""
|
||||
"Errors.resx","SomeInvalidParquetFilesFoundErrorMessage","Shown when the user tries to open a folder and some of the found parquet files are invalid","Some files could not be loaded. Invalid Parquet files:","Bazı parquet dosyaları yüklenemedi. Bulunan geçersiz dosyalar:",""
|
||||
|
|
@ -148,6 +147,8 @@ You can always change this setting later from the Help menu.","Anonim kullanım
|
|||
|
||||
Tercihinizi Yardım menüsunden her zaman değiştirebilirsiniz.",""
|
||||
"Strings.resx","AnalyticsConsentPromptTitle","Message box title that is shown when asking the user if we can gather analytics data","Share Anonymous Usage Data?","Anonim Kullanım Verisi Toplansın mı?",""
|
||||
"Strings.resx","ByteArraysNotSupportedErrorMessage","Shown when a byte[] type is in the results in the Query Editor window. As these types are currently not supported by DuckDB.","Unfortunately byte[] types are currently unsupported in query results. Values will be shown as null.","Ne yazık ki byte[] alanlar sorgu sonuçlarında gösterilememektedir. Değerleri null olarak gösterilecektir.",""
|
||||
"Strings.resx","ByteArraysNotSupportedErrorTitle","Shown when a byte[] type is in the results in the Query Editor window. As these types are currently not supported by DuckDB.","Byte arrays not supported","Byte[] tipli sonuçlar desteklenmemektedir",""
|
||||
"Strings.resx","CancelButtonText","Text to be shown on cancel buttons","Cancel","İptal Et",""
|
||||
"Strings.resx","CancelInitiatedLabelText","Text to be shown between the time a user initiates a cancel operation and when it completes.","Cancelling...","İptal ediliyor...",""
|
||||
"Strings.resx","CantGoBackLinkButtonText","Text shown when the quick peek window's link to go back to the source cell is clicked but the source cell no longer exists","can't go back","geri gidilemiyor",""
|
||||
|
|
@ -196,6 +197,7 @@ Bu ayarı Yardim → Hakkinda sayfasında da yapabilirsiniz.",""
|
|||
"Strings.resx","FileExtensionAssociationPromptTitle","Message box title shown when we ask the user if they'd like to associate .parquet files with ParquetViewer","ParquetViewer file association request","ParquetViewer varsayılan uygulama isteği",""
|
||||
"Strings.resx","FileExtensionAssociationSucceededMessage","Shown when file association succeeds AFTER we ask the user if they'd like to associate ParquetViewer with parquet files","Success! ParquetViewer is now your default application for .parquet files.","Uzantı ilişkilendirmesi başarılı oldu! ParquetViewer .parquet dosyaları için varsayılan uygulamanız olarak ayarlanmıştır.",""
|
||||
"Strings.resx","FileExtensionAssociationSucceededTitle","Shown when file association succeeds AFTER we ask the user if they'd like to associate ParquetViewer with parquet files","File association succeeded","Uzantı ilişkilendirmesi başarılı",""
|
||||
"Strings.resx","FrozenColumnText","When the user right-clicks on a column header this is the text shown for freezing the column. Freezing a column makes it so it is always visible no matter how much you scroll horizontally.","Frozen","Dondur",""
|
||||
"Strings.resx","ImageSavedToDiskMessage","Shown when an image is successfully saved to disk from the quick peek form","Image saved to {0}","Görüntü kaydedildi: {0}",""
|
||||
"Strings.resx","ImageSavedToDiskTitle","Shown when an image is successfully saved to disk from the quick peek form","Save complete","Kayıt tamamlandı",""
|
||||
"Strings.resx","IndexingDataLabelText","Text shown while data is being indexed","Indexing","Dizinleniyor",""
|
||||
|
|
@ -209,6 +211,8 @@ Bu ayarı Yardim → Hakkinda sayfasında da yapabilirsiniz.",""
|
|||
"Strings.resx","MetadataSuccessfullyExportedToFileMessageFormat","Shown when raw thrift metadata is successfully exported to a file","Metadata successfully exported to: {0}","Metadata başarıyla kaydedildi: {0}",""
|
||||
"Strings.resx","MetadataSuccessfullyExportedToFileMessageTitle","Shown when raw thrift metadata is successfully exported to a file","Export complete","Kayıt başarılı",""
|
||||
"Strings.resx","PrivacyPolicyLabelText","Title to be shown before the privacy policy","Privacy policy","Gizlilik politikası",""
|
||||
"Strings.resx","QueryFinishedStatusText","Shown in the Query Editor status bar when the query finishes executing","Finished in:","Tamamlandı:",""
|
||||
"Strings.resx","QueryRunningStatusText","Shown in the Query Editor tool when the query is running","Running:","İşleniyor:",""
|
||||
"Strings.resx","QuerySyntaxHelpText","Shown when the user clicks on the Filter Query (?): link button","NULL CHECK:
|
||||
WHERE field_name IS NULL
|
||||
WHERE field_name IS NOT NULL
|
||||
|
|
@ -249,6 +253,7 @@ BIRDEN FAZLA KOŞUL:
|
|||
|
||||
Daha fazla bilgi için: 'Hakkında → Kullanıcı Kılavuzu'",""
|
||||
"Strings.resx","QuerySyntaxHelpTitle","Shown when the user clicks on the Filter Query (?): link button","Filter Query Syntax Examples","Sorgu Filtresi Söz Dizim Örnekleri",""
|
||||
"Strings.resx","QueryZoomStatusTextFormat","Text format used in the Query Editor tool to display the current zoom level","Query Zoom: {0}%","Sorgu Zumu: {0}%",""
|
||||
"Strings.resx","RecordsToBeExportedTitleFormat","Title shown on the save file dialog when the user is exporting data","{0} records will be exported","{0} kayıt kaydedilecektir",""
|
||||
"Strings.resx","SaveImageAsButtonText","Shown on the save image to file button in the quick peek form","Save image as {0}","Resmi {0} olarak kaydet",""
|
||||
"Strings.resx","SaveImageToFileButtonTextFormat","Text template for the button to save an image preview to disk","Save as {0}","{0} olarak Kaydet",""
|
||||
|
|
@ -264,4 +269,7 @@ Onun yerine sonuçlari {2} dosyasına aktarmak ister misiniz?",""
|
|||
"Strings.resx","TooManyFieldsErrorFormat","Shown on the Field Selection Dialog when there are too many fields to filter by","Too many fields: {0}","Desteklenmeyen sayıda alan: {0}",""
|
||||
"Strings.resx","TypeText","Shown in the title bar of quick peek windows for image previews","Type","Format",""
|
||||
"Strings.resx","UnsupportedFieldCountTextFormat","Shown in the field selection dialog to indicate how many fields are unsupported by ParquetViewer","Unsupported: {0}","Desteklenmeyen: {0}",""
|
||||
"Strings.resx","UnsupportedFieldText","Shown in the field selection dialog next to field names to indicate the field is not supported for viewing with ParquetViewer","(Unsupported)","(Desteklenmiyor)",""
|
||||
"QuickPeekForm.resx","$this.Text",,"Quick Peek","Hızlı Önizleme",""
|
||||
"QuickPeekForm.resx","copyToClipboardToolStripMenuItem.Text",,"Copy to clipboard","Panoya kopyala",""
|
||||
"QuickPeekForm.resx","saveImageToFileButton.Text",,"Save as PNG","PNG olarak Kaydet",""
|
||||
"QuickPeekForm.resx","takeMeBackLinkLabel.Text",,"<<< back","<<< geri",""
|
||||
|
|
|
|||
|
20
.github/workflows/build-test-publish.yaml
vendored
20
.github/workflows/build-test-publish.yaml
vendored
|
|
@ -20,14 +20,14 @@ jobs:
|
|||
skip-publish: ${{ steps.check-tag.outputs.exists }}
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github
|
||||
src
|
||||
|
||||
- name: Setup .NET
|
||||
uses: actions/setup-dotnet@v4.0.1
|
||||
uses: actions/setup-dotnet@v5
|
||||
with:
|
||||
dotnet-version: '8.0.x'
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ jobs:
|
|||
run: dotnet test src/ParquetViewer.sln --no-build --logger trx
|
||||
|
||||
- name: Test Report
|
||||
uses: bibipkins/dotnet-test-reporter@v1.4.1
|
||||
uses: bibipkins/dotnet-test-reporter@v1.6.1
|
||||
if: github.repository == 'mukunku/ParquetViewer'
|
||||
with:
|
||||
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
|
@ -56,7 +56,7 @@ jobs:
|
|||
release_version: ${{ steps.release-version.outputs.release_version }}
|
||||
should_publish: ${{ steps.should-publish.outputs.should_publish }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
sparse-checkout: |
|
||||
src/ParquetViewer/Properties/AssemblyInfo.cs
|
||||
|
|
@ -72,7 +72,7 @@ jobs:
|
|||
Write-Host "Checking version $versionMatch"
|
||||
|
||||
- name: Is there a finalized release already
|
||||
uses: mukunku/release-exists-action@v1.0.0
|
||||
uses: mukunku/release-exists-action@v1.1.0
|
||||
id: check-release
|
||||
with:
|
||||
tag: 'v${{ steps.release-version.outputs.release_version }}'
|
||||
|
|
@ -90,7 +90,7 @@ jobs:
|
|||
PR_NUMBER: ${{ github.event.number }}
|
||||
VERSION_NUMBER: ${{ needs.checkPublish.outputs.release_version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
sparse-checkout: |
|
||||
.github
|
||||
|
|
@ -110,12 +110,12 @@ jobs:
|
|||
|
||||
- name: Build & Publish Regular Release
|
||||
run: |
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release -f net8.0-windows --nologo -o publish -r win-x64 --no-self-contained
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release -f net10.0-windows --nologo -o publish -r win-x64 --no-self-contained
|
||||
Get-Item "./publish/ParquetViewer.exe" | Select-Object Name, Length
|
||||
|
||||
- name: Build & Publish SelfContained Release
|
||||
run: |
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release_SelfContained -f net8.0-windows --nologo -o publish_selfcontained -r win-x64 --self-contained
|
||||
dotnet publish src/ParquetViewer/ParquetViewer.csproj -c Release_SelfContained -f net10.0-windows --nologo -o publish_selfcontained -r win-x64 --self-contained
|
||||
Get-Item "./publish_selfcontained/ParquetViewer.exe" | Select-Object Name, Length
|
||||
|
||||
- name: Prepare executables for upload
|
||||
|
|
@ -125,7 +125,7 @@ jobs:
|
|||
|
||||
- name: Upload unsigned artifact for signing
|
||||
id: upload-unsigned-artifact
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v6
|
||||
with:
|
||||
path: |
|
||||
ParquetViewer.exe
|
||||
|
|
@ -138,7 +138,7 @@ jobs:
|
|||
|
||||
# Documentation: https://about.signpath.io/documentation/trusted-build-systems/github
|
||||
- name: Submit signing request to SignPath.io
|
||||
uses: signpath/github-action-submit-signing-request@v1.1
|
||||
uses: signpath/github-action-submit-signing-request@v2
|
||||
with:
|
||||
api-token: '${{ secrets.SIGNPATH_API_TOKEN }}'
|
||||
organization-id: '5ceccea7-c3e7-4165-8c2e-adab8679db20'
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ jobs:
|
|||
- name: Run PowerShell script
|
||||
shell: pwsh
|
||||
run: |
|
||||
# Find all .resx files for Turkish (tr)
|
||||
# Find all .resx files for Turkish (tr) as that is our source of truth
|
||||
$turkishResxFiles = Get-ChildItem -Path . -Recurse -Filter "*.tr.resx"
|
||||
|
||||
# Array to hold all combined translation entries
|
||||
|
|
@ -55,7 +55,7 @@ jobs:
|
|||
Write-Host "Found English file: $englishFilePath"
|
||||
[xml]$englishContent = Get-Content -Path $englishFilePath
|
||||
$englishContent.root.data | ForEach-Object {
|
||||
if (-not $_.HasAttribute('type')) {
|
||||
if ((-not $_.HasAttribute('type')) -and (-not $_.HasAttribute('mimetype'))) {
|
||||
$englishData[$_.name] = $_.value
|
||||
$englishComments[$_.name] = $_.comment
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ Some key features:
|
|||
* Run simple sql queries on parquet data
|
||||
* Open single or partitioned files
|
||||
* Generate SQL schema from parquet files
|
||||
* Easily preview & export image and audio data
|
||||
|
||||
# Download
|
||||
Releases can be found here: https://github.com/mukunku/ParquetViewer/releases
|
||||
|
|
|
|||
230
src/.editorconfig
Normal file
230
src/.editorconfig
Normal file
|
|
@ -0,0 +1,230 @@
|
|||
root = true
|
||||
|
||||
# C# files
|
||||
[*.cs]
|
||||
|
||||
#### Core EditorConfig Options ####
|
||||
|
||||
# Indentation and spacing
|
||||
indent_size = 4
|
||||
indent_style = space
|
||||
tab_width = 4
|
||||
|
||||
# New line preferences
|
||||
end_of_line = crlf
|
||||
insert_final_newline = false
|
||||
|
||||
#### .NET Coding Conventions ####
|
||||
|
||||
# Organize usings
|
||||
dotnet_separate_import_directive_groups = false
|
||||
dotnet_sort_system_directives_first = false
|
||||
file_header_template = unset
|
||||
|
||||
# this. and Me. preferences
|
||||
dotnet_style_qualification_for_event = false
|
||||
dotnet_style_qualification_for_field = false
|
||||
dotnet_style_qualification_for_method = false
|
||||
dotnet_style_qualification_for_property = false
|
||||
|
||||
# Language keywords vs BCL types preferences
|
||||
dotnet_style_predefined_type_for_locals_parameters_members = true
|
||||
dotnet_style_predefined_type_for_member_access = true
|
||||
|
||||
# Parentheses preferences
|
||||
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity
|
||||
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity
|
||||
dotnet_style_parentheses_in_other_operators = never_if_unnecessary
|
||||
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity
|
||||
|
||||
# Modifier preferences
|
||||
dotnet_style_require_accessibility_modifiers = for_non_interface_members
|
||||
|
||||
# Expression-level preferences
|
||||
dotnet_style_coalesce_expression = true
|
||||
dotnet_style_collection_initializer = true
|
||||
dotnet_style_explicit_tuple_names = true
|
||||
dotnet_style_namespace_match_folder = true
|
||||
dotnet_style_null_propagation = true
|
||||
dotnet_style_object_initializer = true
|
||||
dotnet_style_operator_placement_when_wrapping = beginning_of_line
|
||||
dotnet_style_prefer_auto_properties = true
|
||||
dotnet_style_prefer_collection_expression = when_types_loosely_match
|
||||
dotnet_style_prefer_compound_assignment = true
|
||||
dotnet_style_prefer_conditional_expression_over_assignment = true
|
||||
dotnet_style_prefer_conditional_expression_over_return = true
|
||||
dotnet_style_prefer_foreach_explicit_cast_in_source = when_strongly_typed
|
||||
dotnet_style_prefer_inferred_anonymous_type_member_names = true
|
||||
dotnet_style_prefer_inferred_tuple_names = true
|
||||
dotnet_style_prefer_is_null_check_over_reference_equality_method = true
|
||||
dotnet_style_prefer_simplified_boolean_expressions = true
|
||||
dotnet_style_prefer_simplified_interpolation = true
|
||||
|
||||
# Field preferences
|
||||
dotnet_style_readonly_field = true
|
||||
|
||||
# Parameter preferences
|
||||
dotnet_code_quality_unused_parameters = all:silent
|
||||
|
||||
# Suppression preferences
|
||||
dotnet_remove_unnecessary_suppression_exclusions = none
|
||||
|
||||
# New line preferences
|
||||
dotnet_style_allow_multiple_blank_lines_experimental = true
|
||||
dotnet_style_allow_statement_immediately_after_block_experimental = true
|
||||
|
||||
#### C# Coding Conventions ####
|
||||
|
||||
# var preferences
|
||||
csharp_style_var_elsewhere = false
|
||||
csharp_style_var_for_built_in_types = false
|
||||
csharp_style_var_when_type_is_apparent = false
|
||||
|
||||
# Expression-bodied members
|
||||
csharp_style_expression_bodied_accessors = true
|
||||
csharp_style_expression_bodied_constructors = false
|
||||
csharp_style_expression_bodied_indexers = true
|
||||
csharp_style_expression_bodied_lambdas = true
|
||||
csharp_style_expression_bodied_local_functions = false
|
||||
csharp_style_expression_bodied_methods = false
|
||||
csharp_style_expression_bodied_operators = false
|
||||
csharp_style_expression_bodied_properties = true
|
||||
|
||||
# Pattern matching preferences
|
||||
csharp_style_pattern_matching_over_as_with_null_check = true
|
||||
csharp_style_pattern_matching_over_is_with_cast_check = true
|
||||
csharp_style_prefer_extended_property_pattern = true
|
||||
csharp_style_prefer_not_pattern = true
|
||||
csharp_style_prefer_pattern_matching = true
|
||||
csharp_style_prefer_switch_expression = true
|
||||
|
||||
# Null-checking preferences
|
||||
csharp_style_conditional_delegate_call = true
|
||||
|
||||
# Modifier preferences
|
||||
csharp_prefer_static_local_function = true
|
||||
csharp_preferred_modifier_order = public,private,protected,internal,file,static,extern,new,virtual,abstract,sealed,override,readonly,unsafe,required,volatile,async
|
||||
csharp_style_prefer_readonly_struct = true
|
||||
csharp_style_prefer_readonly_struct_member = true
|
||||
|
||||
# Code-block preferences
|
||||
csharp_prefer_braces = true
|
||||
csharp_prefer_simple_using_statement = true
|
||||
csharp_style_namespace_declarations = block_scoped
|
||||
csharp_style_prefer_method_group_conversion = true
|
||||
csharp_style_prefer_primary_constructors = true
|
||||
csharp_style_prefer_top_level_statements = true
|
||||
|
||||
# Expression-level preferences
|
||||
csharp_prefer_simple_default_expression = true
|
||||
csharp_style_deconstructed_variable_declaration = true
|
||||
csharp_style_implicit_object_creation_when_type_is_apparent = true
|
||||
csharp_style_inlined_variable_declaration = true
|
||||
csharp_style_prefer_index_operator = true
|
||||
csharp_style_prefer_local_over_anonymous_function = true
|
||||
csharp_style_prefer_null_check_over_type_check = true
|
||||
csharp_style_prefer_range_operator = true
|
||||
csharp_style_prefer_tuple_swap = true
|
||||
csharp_style_prefer_utf8_string_literals = true
|
||||
csharp_style_throw_expression = true
|
||||
csharp_style_unused_value_assignment_preference = discard_variable
|
||||
csharp_style_unused_value_expression_statement_preference = discard_variable
|
||||
|
||||
# 'using' directive preferences
|
||||
csharp_using_directive_placement = outside_namespace
|
||||
|
||||
# New line preferences
|
||||
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true
|
||||
csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true
|
||||
csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true
|
||||
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true
|
||||
csharp_style_allow_embedded_statements_on_same_line_experimental = true
|
||||
|
||||
#### C# Formatting Rules ####
|
||||
|
||||
# New line preferences
|
||||
csharp_new_line_before_catch = true
|
||||
csharp_new_line_before_else = true
|
||||
csharp_new_line_before_finally = true
|
||||
csharp_new_line_before_members_in_anonymous_types = true
|
||||
csharp_new_line_before_members_in_object_initializers = true
|
||||
csharp_new_line_before_open_brace = all
|
||||
csharp_new_line_between_query_expression_clauses = true
|
||||
|
||||
# Indentation preferences
|
||||
csharp_indent_block_contents = true
|
||||
csharp_indent_braces = false
|
||||
csharp_indent_case_contents = true
|
||||
csharp_indent_case_contents_when_block = true
|
||||
csharp_indent_labels = one_less_than_current
|
||||
csharp_indent_switch_labels = true
|
||||
|
||||
# Space preferences
|
||||
csharp_space_after_cast = false
|
||||
csharp_space_after_colon_in_inheritance_clause = true
|
||||
csharp_space_after_comma = true
|
||||
csharp_space_after_dot = false
|
||||
csharp_space_after_keywords_in_control_flow_statements = true
|
||||
csharp_space_after_semicolon_in_for_statement = true
|
||||
csharp_space_around_binary_operators = before_and_after
|
||||
csharp_space_around_declaration_statements = false
|
||||
csharp_space_before_colon_in_inheritance_clause = true
|
||||
csharp_space_before_comma = false
|
||||
csharp_space_before_dot = false
|
||||
csharp_space_before_open_square_brackets = false
|
||||
csharp_space_before_semicolon_in_for_statement = false
|
||||
csharp_space_between_empty_square_brackets = false
|
||||
csharp_space_between_method_call_empty_parameter_list_parentheses = false
|
||||
csharp_space_between_method_call_name_and_opening_parenthesis = false
|
||||
csharp_space_between_method_call_parameter_list_parentheses = false
|
||||
csharp_space_between_method_declaration_empty_parameter_list_parentheses = false
|
||||
csharp_space_between_method_declaration_name_and_open_parenthesis = false
|
||||
csharp_space_between_method_declaration_parameter_list_parentheses = false
|
||||
csharp_space_between_parentheses = false
|
||||
csharp_space_between_square_brackets = false
|
||||
|
||||
# Wrapping preferences
|
||||
csharp_preserve_single_line_blocks = true
|
||||
csharp_preserve_single_line_statements = true
|
||||
|
||||
#### Naming styles ####
|
||||
|
||||
# Naming rules
|
||||
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.severity = suggestion
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.symbols = interface
|
||||
dotnet_naming_rule.interface_should_be_begins_with_i.style = begins_with_i
|
||||
|
||||
dotnet_naming_rule.types_should_be_pascal_case.severity = suggestion
|
||||
dotnet_naming_rule.types_should_be_pascal_case.symbols = types
|
||||
dotnet_naming_rule.types_should_be_pascal_case.style = pascal_case
|
||||
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.severity = suggestion
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.symbols = non_field_members
|
||||
dotnet_naming_rule.non_field_members_should_be_pascal_case.style = pascal_case
|
||||
|
||||
# Symbol specifications
|
||||
|
||||
dotnet_naming_symbols.interface.applicable_kinds = interface
|
||||
dotnet_naming_symbols.interface.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.interface.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.types.applicable_kinds = class, struct, interface, enum
|
||||
dotnet_naming_symbols.types.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.types.required_modifiers =
|
||||
|
||||
dotnet_naming_symbols.non_field_members.applicable_kinds = property, event, method
|
||||
dotnet_naming_symbols.non_field_members.applicable_accessibilities = public, internal, private, protected, protected_internal, private_protected
|
||||
dotnet_naming_symbols.non_field_members.required_modifiers =
|
||||
|
||||
# Naming styles
|
||||
|
||||
dotnet_naming_style.pascal_case.required_prefix =
|
||||
dotnet_naming_style.pascal_case.required_suffix =
|
||||
dotnet_naming_style.pascal_case.word_separator =
|
||||
dotnet_naming_style.pascal_case.capitalization = pascal_case
|
||||
|
||||
dotnet_naming_style.begins_with_i.required_prefix = I
|
||||
dotnet_naming_style.begins_with_i.required_suffix =
|
||||
dotnet_naming_style.begins_with_i.word_separator =
|
||||
dotnet_naming_style.begins_with_i.capitalization = pascal_case
|
||||
|
|
@ -5,12 +5,14 @@
|
|||
<ItemGroup>
|
||||
<PackageVersion Include="Apache.Arrow" Version="22.1.0" />
|
||||
<PackageVersion Include="dotnet-file-associator" Version="0.1.4" />
|
||||
<PackageVersion Include="DuckDB.NET.Data.Full" Version="1.4.3" />
|
||||
<PackageVersion Include="FCTB" Version="2.16.24" />
|
||||
<PackageVersion Include="MiniExcel" Version="2.0.0-preview.2" />
|
||||
<PackageVersion Include="MSTest.TestAdapter" Version="4.0.2" />
|
||||
<PackageVersion Include="MSTest.TestFramework" Version="4.0.2" />
|
||||
<PackageVersion Include="NAudio" Version="2.2.1" />
|
||||
<PackageVersion Include="NAudio.WinForms" Version="2.2.1" />
|
||||
<PackageVersion Include="Parquet.Net" Version="5.4.0" />
|
||||
<PackageVersion Include="NAudio" Version="2.3.0" />
|
||||
<PackageVersion Include="NAudio.WinForms" Version="2.3.0" />
|
||||
<PackageVersion Include="Parquet.Net" Version="5.6.0-pre.3" />
|
||||
<PackageVersion Include="Microsoft.NET.Test.Sdk" Version="18.0.1" />
|
||||
<PackageVersion Include="RichardSzalay.MockHttp" Version="7.0.0" />
|
||||
</ItemGroup>
|
||||
|
|
|
|||
43
src/ParquetViewer.Engine.DuckDB/DuckDBHandle.cs
Normal file
43
src/ParquetViewer.Engine.DuckDB/DuckDBHandle.cs
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
using DuckDB.NET.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class DuckDBHandle : IDisposable
|
||||
{
|
||||
public string ParquetFilePath { get; }
|
||||
public DuckDBConnection Connection { get; }
|
||||
|
||||
private DuckDBHandle(DuckDBConnection connection, string parquetPath)
|
||||
{
|
||||
ParquetFilePath = parquetPath;
|
||||
Connection = connection;
|
||||
}
|
||||
|
||||
public static async Task<DuckDBHandle> OpenAsync(string parquetPath)
|
||||
{
|
||||
if (!File.Exists(parquetPath)) //handles null
|
||||
throw new FileNotFoundException(parquetPath);
|
||||
|
||||
var connection = new DuckDBConnection("Data Source=:memory:");
|
||||
try
|
||||
{
|
||||
await connection.OpenAsync();
|
||||
return new DuckDBHandle(connection, parquetPath);
|
||||
}
|
||||
catch
|
||||
{
|
||||
connection.Dispose();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
Connection.Dispose();
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
}
|
||||
}
|
||||
135
src/ParquetViewer.Engine.DuckDB/DuckDBHelper.cs
Normal file
135
src/ParquetViewer.Engine.DuckDB/DuckDBHelper.cs
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
using DuckDB.NET.Data;
|
||||
using DuckDB.NET.Native;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Numerics;
|
||||
using System.Text;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
internal static class DuckDBHelper
|
||||
{
|
||||
internal record DuckDBField(string Name, DuckDBType DuckDBType, Type Type);
|
||||
|
||||
public static async Task<List<DuckDBField>> GetFields(DuckDBHandle db)
|
||||
{
|
||||
var fields = new List<DuckDBField>();
|
||||
using var result = await db.Connection.QueryAsync($"DESCRIBE TABLE '{db.ParquetFilePath}';");
|
||||
await foreach (var row in result)
|
||||
{
|
||||
var columnName = row.GetString(0);
|
||||
var columnTypeName = row.GetString(1);
|
||||
var (duckDBType, clrType) = ParseDuckDBType(columnTypeName, columnTypeName);
|
||||
|
||||
fields.Add(new(columnName, duckDBType, clrType));
|
||||
}
|
||||
return fields;
|
||||
}
|
||||
|
||||
public static (DuckDBType DuckDBType, Type Type) ParseDuckDBType(string duckDBTypeName, string? columnTypeName)
|
||||
{
|
||||
//Sometimes the duckdb type is reported as NULL, in which case we need to fall back to the column type name.
|
||||
//Values here seem to match the Parquet format's supported types: https://parquet.apache.org/docs/file-format/types/
|
||||
if (duckDBTypeName.Trim('"') == "NULL" && columnTypeName is not null)
|
||||
{
|
||||
return columnTypeName switch
|
||||
{
|
||||
"BOOLEAN" => (DuckDBType.Boolean, typeof(bool)),
|
||||
"INT32" => (DuckDBType.Integer, typeof(int)),
|
||||
"INT64" => (DuckDBType.BigInt, typeof(long)),
|
||||
"INT96" => (DuckDBType.HugeInt, typeof(BigInteger)),
|
||||
"FLOAT" => (DuckDBType.Float, typeof(float)),
|
||||
"DOUBLE" => (DuckDBType.Double, typeof(double)),
|
||||
"FIXED_LEN_BYTE_ARRAY" => (DuckDBType.Blob, typeof(ByteArrayValue)),
|
||||
"BYTE_ARRAY" => (DuckDBType.Blob, typeof(ByteArrayValue)),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(columnTypeName), $"Unsupported Parquet column type: {columnTypeName}"),
|
||||
};
|
||||
}
|
||||
|
||||
// This mapping is based on https://duckdb.net/docs/type-mapping.html
|
||||
// It handles simple types and parameterized types by checking the start of the string.
|
||||
if (duckDBTypeName.EndsWith("[]")) return (DuckDBType.List, typeof(List<>));
|
||||
|
||||
if (duckDBTypeName.StartsWith("DECIMAL")) return (DuckDBType.Decimal, typeof(decimal));
|
||||
if (duckDBTypeName.StartsWith("VARCHAR")) return (DuckDBType.Varchar, typeof(string));
|
||||
if (duckDBTypeName.StartsWith("LIST")) return (DuckDBType.List, typeof(List<>));
|
||||
if (duckDBTypeName.StartsWith("MAP")) return (DuckDBType.Map, typeof(Dictionary<,>));
|
||||
if (duckDBTypeName.StartsWith("STRUCT")) return (DuckDBType.Struct, typeof(ValueTuple));
|
||||
if (duckDBTypeName.StartsWith("ENUM")) return (DuckDBType.Enum, typeof(string));
|
||||
if (duckDBTypeName.StartsWith("TIMESTAMP")) return (DuckDBType.Timestamp, typeof(DateTime));
|
||||
|
||||
return duckDBTypeName switch
|
||||
{
|
||||
"BOOLEAN" => (DuckDBType.Boolean, typeof(bool)),
|
||||
"TINYINT" => (DuckDBType.TinyInt, typeof(sbyte)),
|
||||
"SMALLINT" => (DuckDBType.SmallInt, typeof(short)),
|
||||
"INTEGER" => (DuckDBType.Integer, typeof(int)),
|
||||
"BIGINT" => (DuckDBType.BigInt, typeof(long)),
|
||||
"HUGEINT" => (DuckDBType.BigInt, typeof(BigInteger)),
|
||||
"UTINYINT" => (DuckDBType.UnsignedTinyInt, typeof(byte)),
|
||||
"USMALLINT" => (DuckDBType.UnsignedSmallInt, typeof(ushort)),
|
||||
"UINTEGER" => (DuckDBType.UnsignedInteger, typeof(uint)),
|
||||
"UBIGINT" => (DuckDBType.UnsignedBigInt, typeof(ulong)),
|
||||
"UHUGEINT" => (DuckDBType.HugeInt, typeof(BigInteger)),
|
||||
"DOUBLE" => (DuckDBType.Double, typeof(double)),
|
||||
"FLOAT" or "REAL" => (DuckDBType.Float, typeof(float)),
|
||||
"BLOB" => (DuckDBType.Blob, typeof(ByteArrayValue)),
|
||||
"DATE" => (DuckDBType.Date, typeof(DateOnly)),
|
||||
"TIME" => (DuckDBType.Time, typeof(TimeOnly)),
|
||||
"INTERVAL" => (DuckDBType.Interval, typeof(TimeSpan)),
|
||||
"UUID" => (DuckDBType.Uuid, typeof(Guid)),
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(duckDBTypeName), $"Unsupported DuckDB type: {duckDBTypeName}({columnTypeName})")
|
||||
};
|
||||
}
|
||||
|
||||
//DuckDB flattens the schema so we need to rebuild it into a tree structure.
|
||||
public static async Task<ParquetSchemaElement> GetParquetSchemaTreeAsync(DuckDBHandle db)
|
||||
{
|
||||
var result = await db.Connection.QueryAsync($"SELECT * FROM parquet_schema('{db.ParquetFilePath}');");
|
||||
var enumerator = result.GetAsyncEnumerator();
|
||||
|
||||
if (!await enumerator.MoveNextAsync())
|
||||
{
|
||||
throw new InvalidDataException("Failed to retrieve Parquet schema.");
|
||||
}
|
||||
|
||||
var rootNode = ParquetSchemaElement.FromRow(enumerator.Current);
|
||||
await ReadChildrenAsync(rootNode, enumerator);
|
||||
return rootNode;
|
||||
|
||||
async Task ReadChildrenAsync(ParquetSchemaElement parent, IAsyncEnumerator<DuckDBDataReader> enumerator)
|
||||
{
|
||||
for (int i = 0; i < parent.NumChildren; i++)
|
||||
{
|
||||
if (!await enumerator.MoveNextAsync())
|
||||
{
|
||||
throw new InvalidDataException($"Premature end to parquet schema for field `{parent.Path}`.");
|
||||
}
|
||||
|
||||
var childNode = ParquetSchemaElement.FromRow(enumerator.Current);
|
||||
parent.Children.Add(childNode);
|
||||
await ReadChildrenAsync(childNode, enumerator);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<Dictionary<string, string>> GetCustomMetadataAsync(DuckDBHandle db)
|
||||
{
|
||||
var query = $"SELECT * FROM parquet_kv_metadata('{db.ParquetFilePath}');";
|
||||
var metadata = new Dictionary<string, string>();
|
||||
using var result = await db.Connection.QueryAsync(query);
|
||||
await foreach (var row in result)
|
||||
{
|
||||
var keyStream = await row.GetFieldValueAsync<Stream>(1);
|
||||
var valueStream = await row.GetFieldValueAsync<Stream>(2);
|
||||
|
||||
using var keyReader = new StreamReader(keyStream, Encoding.UTF8);
|
||||
string key = await keyReader.ReadToEndAsync();
|
||||
|
||||
using var valueReader = new StreamReader(valueStream, Encoding.UTF8);
|
||||
string value = await valueReader.ReadToEndAsync();
|
||||
metadata[key] = value;
|
||||
}
|
||||
return metadata;
|
||||
}
|
||||
}
|
||||
}
|
||||
24
src/ParquetViewer.Engine.DuckDB/ExtensionMethods.cs
Normal file
24
src/ParquetViewer.Engine.DuckDB/ExtensionMethods.cs
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
using DuckDB.NET.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
internal static class ExtensionMethods
|
||||
{
|
||||
public static async Task<QueryResult> QueryAsync(this DuckDBConnection db, string sql)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(db);
|
||||
ArgumentNullException.ThrowIfNull(sql);
|
||||
|
||||
if (db.State == System.Data.ConnectionState.Closed)
|
||||
{
|
||||
await db.OpenAsync();
|
||||
}
|
||||
|
||||
using var command = db.CreateCommand();
|
||||
command.CommandText = sql;
|
||||
|
||||
var reader = command.ExecuteReader();
|
||||
return new QueryResult(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
463
src/ParquetViewer.Engine.DuckDB/ParquetEngine.cs
Normal file
463
src/ParquetViewer.Engine.DuckDB/ParquetEngine.cs
Normal file
|
|
@ -0,0 +1,463 @@
|
|||
using DuckDB.NET.Data;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
using System.Data;
|
||||
using static ParquetViewer.Engine.DuckDB.DuckDBHelper;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class ParquetEngine : IParquetEngine
|
||||
{
|
||||
private readonly List<DuckDBHandle> _dbs;
|
||||
private readonly List<ParquetMetadata> _metadatas;
|
||||
|
||||
public string Path { get; set; }
|
||||
|
||||
public List<string> Fields => this._fields.Select(f => f.Name).ToList();
|
||||
|
||||
public long RecordCount { get; }
|
||||
|
||||
public int NumberOfPartitions => this._dbs.Count;
|
||||
|
||||
public Dictionary<string, string> CustomMetadata { get; }
|
||||
|
||||
public IParquetMetadata Metadata => this._metadatas.First();
|
||||
|
||||
private readonly List<DuckDBField> _fields;
|
||||
|
||||
private static int GetFieldsHashCode(List<DuckDBField> fields)
|
||||
{
|
||||
var hashCode = new HashCode();
|
||||
foreach (var field in fields)
|
||||
{
|
||||
hashCode.Add(field.Name);
|
||||
hashCode.Add(field.Type);
|
||||
hashCode.Add(field.DuckDBType);
|
||||
}
|
||||
return hashCode.ToHashCode();
|
||||
}
|
||||
|
||||
private ParquetEngine(string filePath, DuckDBHandle db, ParquetMetadata metadata, List<DuckDBField> fields, long recordCount, Dictionary<string, string> customMetadata)
|
||||
{
|
||||
this._dbs = [db];
|
||||
this.Path = filePath;
|
||||
this._metadatas = [metadata];
|
||||
this._fields = FilterOutFieldsThatDontExist(fields, metadata);
|
||||
this.RecordCount = recordCount;
|
||||
this.CustomMetadata = customMetadata;
|
||||
}
|
||||
|
||||
private ParquetEngine(string folderPath, List<DuckDBHandle> dbs, List<ParquetMetadata> metadatas, List<DuckDBField> fields, long recordCount, Dictionary<string, string> customMetadata)
|
||||
{
|
||||
this._dbs = dbs;
|
||||
this.Path = folderPath;
|
||||
this._metadatas = metadatas;
|
||||
this._fields = FilterOutFieldsThatDontExist(fields, this._metadatas.First());
|
||||
this.RecordCount = recordCount;
|
||||
this.CustomMetadata = customMetadata;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// DuckDB sometimes returns fields from the DESCRIBE TABLE query that don't actually exist in the Parquet file.
|
||||
/// </summary>
|
||||
/// <returns>Returns a new list with fields that actually exist in the parquet file.</returns>
|
||||
/// <remarks>Fixes PARTITIONED_PARQUET_FILE_TEST</remarks>
|
||||
private static List<DuckDBField> FilterOutFieldsThatDontExist(List<DuckDBField> fields, ParquetMetadata metadata)
|
||||
{
|
||||
var fieldsThatExist = new List<DuckDBField>();
|
||||
foreach (var field in fields)
|
||||
{
|
||||
if (metadata.SchemaTree.Children.Cast<IParquetSchemaElement>().Any(f => f.Path == field.Name))
|
||||
{
|
||||
fieldsThatExist.Add(field);
|
||||
}
|
||||
}
|
||||
return fieldsThatExist;
|
||||
}
|
||||
|
||||
public static Task<ParquetEngine> OpenFileOrFolderAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
return OpenFileAsync(parquetFilePath, cancellationToken);
|
||||
}
|
||||
else if (Directory.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
return OpenFolderAsync(parquetFilePath, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FileNotFoundException(parquetFilePath);
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFileAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find parquet file at: {parquetFilePath}");
|
||||
}
|
||||
|
||||
var db = await DuckDBHandle.OpenAsync(parquetFilePath);
|
||||
try
|
||||
{
|
||||
var parquetMetadata = await ParquetMetadata.FromDuckDBAsync(db);
|
||||
var fields = await DuckDBHelper.GetFields(db);
|
||||
var customMetadata = await DuckDBHelper.GetCustomMetadataAsync(db);
|
||||
return new ParquetEngine(parquetFilePath, db, parquetMetadata, fields.ToList(), parquetMetadata.RowCount, customMetadata);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
db.Dispose();
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFolderAsync(string folderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(folderPath)) //Handles null
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory doesn't exist: {folderPath}");
|
||||
}
|
||||
|
||||
var skippedFiles = new Dictionary<string, Exception>();
|
||||
var fileGroups = new Dictionary<int, List<DuckDBHandle>>();
|
||||
foreach (var file in Helpers.ListParquetFiles(folderPath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var db = await DuckDBHandle.OpenAsync(file);
|
||||
var fileFields = await DuckDBHelper.GetFields(db);
|
||||
var fieldsHashCode = GetFieldsHashCode(fileFields);
|
||||
if (!fileGroups.ContainsKey(fieldsHashCode))
|
||||
{
|
||||
fileGroups.Add(fieldsHashCode, []);
|
||||
}
|
||||
|
||||
fileGroups[fieldsHashCode].Add(db);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
skippedFiles.Add(System.IO.Path.GetRelativePath(folderPath, file), ex);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileGroups.Keys.Count == 0)
|
||||
{
|
||||
if (skippedFiles.Count == 0)
|
||||
{
|
||||
throw new FileNotFoundException("Directory is empty");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AllFilesSkippedException(skippedFiles);
|
||||
}
|
||||
}
|
||||
else if (fileGroups.Keys.Count > 1)
|
||||
{
|
||||
//We found more than one type of schema.
|
||||
foreach (var fileGroupList in fileGroups.Values)
|
||||
{
|
||||
Helpers.EZDispose(fileGroupList);
|
||||
}
|
||||
|
||||
var fieldsByFile = new List<List<string>>();
|
||||
foreach (var db in fileGroups.Values)
|
||||
{
|
||||
var groupFields = await DuckDBHelper.GetFields(db.First());
|
||||
fieldsByFile.Add(groupFields.Select(f => f.Name).ToList());
|
||||
}
|
||||
|
||||
throw new MultipleSchemasFoundException(fieldsByFile);
|
||||
}
|
||||
else if (skippedFiles.Count > 0)
|
||||
{
|
||||
//We found one schema but some files couldn't be read
|
||||
Helpers.EZDispose(fileGroups.Values.First());
|
||||
throw new SomeFilesSkippedException(skippedFiles);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
//We have only one schema across all files and are good to go
|
||||
List<DuckDBHandle> dbs = fileGroups.Values.First();
|
||||
|
||||
var metadatas = new List<ParquetMetadata>();
|
||||
foreach (var db in dbs)
|
||||
{
|
||||
var metadata = await ParquetMetadata.FromDuckDBAsync(db);
|
||||
metadatas.Add(metadata);
|
||||
}
|
||||
|
||||
var totalRecordCount = metadatas.Sum(m => m.RowCount);
|
||||
var fields = await DuckDBHelper.GetFields(dbs.First());
|
||||
var customMetadata = await DuckDBHelper.GetCustomMetadataAsync(dbs.First());
|
||||
|
||||
return new ParquetEngine(folderPath, dbs, metadatas, fields, totalRecordCount, customMetadata);
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
Helpers.EZDispose(this._dbs);
|
||||
}
|
||||
|
||||
private async IAsyncEnumerable<DuckDBDataReader> QueryDataAsync(List<string> selectedFields, int offset, int recordCount)
|
||||
{
|
||||
var fields = string.Join(", ", selectedFields.Select(MakeColumnSafe));
|
||||
foreach ((DuckDBHandle db, ParquetMetadata metadata) in Helpers.PairEnumerables(this._dbs, this._metadatas))
|
||||
{
|
||||
EnsureFileExists(db.ParquetFilePath);
|
||||
|
||||
if (recordCount <= 0)
|
||||
yield break;
|
||||
|
||||
if (offset >= metadata.RowCount)
|
||||
{
|
||||
offset -= metadata.RowCount;
|
||||
continue;
|
||||
}
|
||||
|
||||
var query = $"SELECT {fields} " +
|
||||
$"FROM read_parquet('{db.ParquetFilePath}') " +
|
||||
$"LIMIT {recordCount} " +
|
||||
$"OFFSET {offset};";
|
||||
|
||||
offset = 0;
|
||||
|
||||
using var result = await db.Connection.QueryAsync(query);
|
||||
await foreach (var row in result)
|
||||
{
|
||||
yield return row;
|
||||
recordCount--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public async Task<Func<bool, DataTable>> ReadRowsAsync(List<string> selectedFields, int offset, int recordCount, CancellationToken cancellationToken, IProgress<int>? progress = null)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(recordCount, nameof(recordCount));
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(offset, nameof(offset));
|
||||
|
||||
var result = CreateEmptyDataTable(selectedFields);
|
||||
result.BeginLoadData();
|
||||
await foreach (var row in this.QueryDataAsync(selectedFields, offset, recordCount))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var values = new object[row.FieldCount];
|
||||
try
|
||||
{
|
||||
row.GetValues(values);
|
||||
}
|
||||
catch (OverflowException ex) when (ex.Message == "Value was either too large or too small for a Decimal.")
|
||||
{
|
||||
throw new DecimalOverflowException(ex);
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
throw;
|
||||
}
|
||||
|
||||
//Convert values to our types
|
||||
for (var columnIndex = 0; columnIndex < row.FieldCount; columnIndex++)
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var fieldName = selectedFields.ElementAt(columnIndex);
|
||||
var parquetSchemaElement = (ParquetSchemaElement)this._metadatas.First().SchemaTree.Children.First(f => f.Path == fieldName);
|
||||
values[columnIndex] = ConvertValueTypeIfNeeded(values[columnIndex], parquetSchemaElement);
|
||||
}
|
||||
|
||||
//supposedly this is the fastest way to load data into a datatable https://stackoverflow.com/a/17123914/1458738
|
||||
result.LoadDataRow(values, false);
|
||||
|
||||
progress?.Report(row.FieldCount);
|
||||
}
|
||||
result.EndLoadData();
|
||||
|
||||
return (bool shouldLogProgress) =>
|
||||
{
|
||||
if (shouldLogProgress)
|
||||
{
|
||||
//We don't have any post-processing. So just report the total.
|
||||
progress?.Report(result.Rows.Count * result.Columns.Count);
|
||||
}
|
||||
return result;
|
||||
};
|
||||
|
||||
object ConvertValueTypeIfNeeded(object? value, ParquetSchemaElement? parquetSchemaElement)
|
||||
{
|
||||
if (value is null || value == DBNull.Value || parquetSchemaElement is null)
|
||||
return DBNull.Value;
|
||||
|
||||
if (parquetSchemaElement.FieldType == FieldTypeId.List)
|
||||
{
|
||||
var list = (IList)value;
|
||||
ParquetSchemaElement? listItemField = null;
|
||||
if (parquetSchemaElement.Children.Count > 0)
|
||||
{
|
||||
var listField = parquetSchemaElement.GetListField();
|
||||
if (listField.Children.Count == 0) //Assume 2-tier list variation (fixes: TWO_TIER_TEPEATED_LIST_FIELDS_TEST)
|
||||
{
|
||||
listItemField = listField;
|
||||
}
|
||||
else
|
||||
{
|
||||
listItemField = listField.GetListItemField();
|
||||
}
|
||||
}
|
||||
else if (parquetSchemaElement.IsPrimitive) //2-tier list (fixes: TWO_TIER_TEPEATED_LIST_FIELDS_TEST)
|
||||
{
|
||||
var newestList = new ArrayList(list.Count);
|
||||
foreach (var item in list)
|
||||
{
|
||||
newestList.Add(item);
|
||||
}
|
||||
return new ListValue(newestList, parquetSchemaElement.ClrType);
|
||||
}
|
||||
|
||||
var newList = new ArrayList(list.Count);
|
||||
foreach (var item in list)
|
||||
{
|
||||
newList.Add(ConvertValueTypeIfNeeded(item, listItemField));
|
||||
}
|
||||
|
||||
return new ListValue(newList, listItemField!.ClrType!);
|
||||
}
|
||||
else if (parquetSchemaElement.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
var @struct = (Dictionary<string, object?>)value;
|
||||
var dataTable = new DataTableLite(1);
|
||||
foreach (var fieldName in @struct.Keys)
|
||||
{
|
||||
var field = parquetSchemaElement.GetSingleOrByName(fieldName);
|
||||
if (field.FieldType == FieldTypeId.List)
|
||||
{
|
||||
dataTable.AddColumn(fieldName, typeof(ListValue), field);
|
||||
}
|
||||
else if (field.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
dataTable.AddColumn(fieldName, typeof(StructValue), field);
|
||||
}
|
||||
else if (field.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
dataTable.AddColumn(fieldName, typeof(MapValue), field);
|
||||
}
|
||||
else //Primitive
|
||||
{
|
||||
dataTable.AddColumn(fieldName, field.ClrType, field);
|
||||
}
|
||||
}
|
||||
dataTable.NewRow();
|
||||
var fieldIndex = 0;
|
||||
foreach (var keyValuePair in @struct)
|
||||
{
|
||||
var field = parquetSchemaElement.GetSingleOrByName(keyValuePair.Key);
|
||||
dataTable.Rows[0][fieldIndex] = ConvertValueTypeIfNeeded(keyValuePair.Value ?? DBNull.Value, field);
|
||||
fieldIndex++;
|
||||
}
|
||||
|
||||
return new StructValue(dataTable.GetRowAt(0));
|
||||
}
|
||||
else if (parquetSchemaElement.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
var map = (IDictionary)value;
|
||||
var mapField = parquetSchemaElement.GetMapKeyValueField();
|
||||
var mapKeyField = mapField.GetMapKeyField();
|
||||
var mapValueField = mapField.GetMapValueField();
|
||||
|
||||
var count = Math.Max(map.Keys.Count, map.Values.Count);
|
||||
var keys = new ArrayList(count);
|
||||
var values = new ArrayList(count);
|
||||
foreach ((object? key, object? value) pair in
|
||||
Helpers.PairEnumerables(map.Keys.Cast<object?>(), map.Values.Cast<object?>(), DBNull.Value))
|
||||
{
|
||||
keys.Add(ConvertValueTypeIfNeeded(pair.key, mapKeyField));
|
||||
values.Add(ConvertValueTypeIfNeeded(pair.value, mapValueField));
|
||||
}
|
||||
|
||||
return new MapValue(keys, mapKeyField.ClrType,
|
||||
values, mapValueField.ClrType);
|
||||
}
|
||||
else if (parquetSchemaElement.FieldType == FieldTypeId.Primitive //2-tier list
|
||||
&& parquetSchemaElement.RepetitionType == RepetitionTypeId.Repeated)
|
||||
{
|
||||
var list = (IList)value;
|
||||
|
||||
var newList = new ArrayList(list.Count);
|
||||
foreach (var item in list)
|
||||
{
|
||||
newList.Add(ConvertValueTypeIfNeeded(item, null));
|
||||
}
|
||||
|
||||
return new ListValue(newList, parquetSchemaElement.ClrType);
|
||||
}
|
||||
else if (parquetSchemaElement.IsByteArrayType)
|
||||
{
|
||||
using var ms = new MemoryStream();
|
||||
((Stream)value).CopyTo(ms);
|
||||
return new ByteArrayValue(ms.ToArray());
|
||||
}
|
||||
else //primitive value
|
||||
{
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private DataTable CreateEmptyDataTable(List<string> selectedFields)
|
||||
{
|
||||
var dataTable = new DataTable();
|
||||
foreach (var field in this._fields)
|
||||
{
|
||||
if (!selectedFields.Contains(field.Name))
|
||||
continue;
|
||||
|
||||
var schemaField = (ParquetSchemaElement)this.Metadata.SchemaTree.GetChild(field.Name);
|
||||
if (schemaField.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(StructValue)));
|
||||
}
|
||||
else if (schemaField.FieldType == FieldTypeId.List)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(ListValue)));
|
||||
}
|
||||
else if (schemaField.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(MapValue)));
|
||||
}
|
||||
else if (schemaField.IsByteArrayType)
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, typeof(ByteArrayValue)));
|
||||
}
|
||||
else //Primitive type
|
||||
{
|
||||
dataTable.Columns.Add(new DataColumn(field.Name, field.Type));
|
||||
}
|
||||
}
|
||||
return dataTable;
|
||||
}
|
||||
|
||||
private void EnsureFileExists(string filePath)
|
||||
{
|
||||
if (!File.Exists(filePath))
|
||||
{
|
||||
throw new FileNotFoundException($"Parquet file no longer exists at: {this.Path}");
|
||||
}
|
||||
}
|
||||
|
||||
private static string MakeColumnSafe(string columnName)
|
||||
{
|
||||
// Enclose in double quotes and escape existing double quotes
|
||||
var safeName = columnName.Replace("\"", "\"\"");
|
||||
return $"\"{safeName}\"";
|
||||
}
|
||||
|
||||
public async Task WriteDataToParquetFileAsync(DataTable dataTable, string path, CancellationToken cancellationToken,
|
||||
IProgress<int> progress, Dictionary<string, string>? customMetadata)
|
||||
=> throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
250
src/ParquetViewer.Engine.DuckDB/ParquetMetadata.cs
Normal file
250
src/ParquetViewer.Engine.DuckDB/ParquetMetadata.cs
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class ParquetMetadata : IParquetMetadata
|
||||
{
|
||||
public int ParquetVersion { get; }
|
||||
|
||||
public int RowGroupCount { get; }
|
||||
|
||||
public int RowCount { get; }
|
||||
|
||||
public string CreatedBy { get; }
|
||||
|
||||
public ICollection<IRowGroupMetadata> RowGroups { get; }
|
||||
|
||||
public IParquetSchemaElement SchemaTree { get; }
|
||||
|
||||
private ParquetMetadata(IParquetSchemaElement schemaTree, ICollection<IRowGroupMetadata> rowGroups,
|
||||
int recordCount, int parquetVersion, string createdBy, int rowGroupCount)
|
||||
{
|
||||
this.SchemaTree = schemaTree;
|
||||
this.RowGroups = rowGroups;
|
||||
this.RowCount = recordCount;
|
||||
this.ParquetVersion = parquetVersion;
|
||||
this.CreatedBy = createdBy;
|
||||
this.RowGroupCount = rowGroupCount;
|
||||
}
|
||||
|
||||
public static async Task<ParquetMetadata> FromDuckDBAsync(DuckDBHandle db)
|
||||
{
|
||||
var schemaTree = await DuckDBHelper.GetParquetSchemaTreeAsync(db);
|
||||
|
||||
#region RowGroups
|
||||
var rowGroupColumns = new List<(RowGroupMetadataResult RowGroup, RowGroupColumnMetadata Column)>();
|
||||
using var result = await db.Connection.QueryAsync($"SELECT * FROM parquet_metadata('{db.ParquetFilePath}');");
|
||||
await foreach (var row in result)
|
||||
{
|
||||
string fileName = row.GetString(0);
|
||||
|
||||
long rowGroupId = row.GetInt64(1);
|
||||
long rowGroupNumRows = row.GetInt64(2);
|
||||
long rowGroupNumColumns = row.GetInt64(3);
|
||||
long rowGroupBytes = row.GetInt64(4);
|
||||
long? rowGroupCompressedBytes = row.IsDBNull(28) ? null : row.GetInt64(28);
|
||||
long? fileOffset = row.IsDBNull(6) ? null : row.GetInt64(6);
|
||||
var rowGroupMetadataResult = new RowGroupMetadataResult(rowGroupId, rowGroupNumRows, rowGroupNumColumns, rowGroupBytes, rowGroupCompressedBytes ?? -1, fileOffset ?? -1);
|
||||
|
||||
long columnId = row.GetInt64(5);
|
||||
long numValues = row.GetInt64(7);
|
||||
|
||||
string pathInSchema = row.GetString(8);
|
||||
string type = row.GetString(9);
|
||||
|
||||
string? statsMin = row.IsDBNull(10) ? null : row.GetString(10);
|
||||
string? statsMax = row.IsDBNull(11) ? null : row.GetString(11);
|
||||
|
||||
long? statsNullCount = row.IsDBNull(12) ? null : row.GetInt64(12);
|
||||
long? statsDistinctCount = row.IsDBNull(13) ? null : row.GetInt64(13);
|
||||
|
||||
string? statsMinValue = row.IsDBNull(14) ? null : row.GetString(14);
|
||||
string? statsMaxValue = row.IsDBNull(15) ? null : row.GetString(15);
|
||||
|
||||
string compression = row.GetString(16);
|
||||
string encodings = row.GetString(17);
|
||||
|
||||
long? indexPageOffset = row.IsDBNull(18) ? null : row.GetInt64(18);
|
||||
long? dictionaryPageOffset = row.IsDBNull(19) ? null : row.GetInt64(19);
|
||||
long dataPageOffset = row.GetInt64(20);
|
||||
|
||||
long totalCompressedSize = row.GetInt64(21);
|
||||
long totalUncompressedSize = row.GetInt64(22);
|
||||
|
||||
long? bloomFilterOffset = row.IsDBNull(24) ? null : row.GetInt64(24);
|
||||
long? bloomFilterLength = row.IsDBNull(25) ? null : row.GetInt64(25);
|
||||
|
||||
bool? minIsExact = row.IsDBNull(26) ? null : row.GetBoolean(26);
|
||||
bool? maxIsExact = row.IsDBNull(27) ? null : row.GetBoolean(27);
|
||||
|
||||
var rowGroupColumnMetadata = new RowGroupColumnMetadata(
|
||||
(int)columnId,
|
||||
pathInSchema,
|
||||
type,
|
||||
(int)numValues,
|
||||
totalUncompressedSize,
|
||||
totalCompressedSize,
|
||||
dataPageOffset,
|
||||
indexPageOffset,
|
||||
dictionaryPageOffset,
|
||||
new RowGroupColumnStatistics(
|
||||
statsMin,
|
||||
statsMax,
|
||||
statsNullCount,
|
||||
statsDistinctCount,
|
||||
statsMinValue,
|
||||
statsMaxValue,
|
||||
minIsExact,
|
||||
maxIsExact),
|
||||
bloomFilterOffset,
|
||||
bloomFilterLength);
|
||||
|
||||
rowGroupColumns.Add((rowGroupMetadataResult, rowGroupColumnMetadata));
|
||||
}
|
||||
|
||||
List<IRowGroupMetadata> rowGroups = rowGroupColumns.GroupBy(rgc => rgc.RowGroup.rowGroupId).Select(group =>
|
||||
{
|
||||
var rowGroupId = group.Key;
|
||||
long? firstFileOffset = null;
|
||||
RowGroupMetadataResult? rowGroupMetadataResult = null;
|
||||
List<RowGroupColumnMetadata> columnMetadatas = new();
|
||||
foreach (var column in group)
|
||||
{
|
||||
firstFileOffset ??= column.RowGroup.fileOffset;
|
||||
rowGroupMetadataResult = column.RowGroup;
|
||||
columnMetadatas.Add(column.Column);
|
||||
}
|
||||
|
||||
if (rowGroupMetadataResult is null)
|
||||
return null;
|
||||
|
||||
return new RowGroupMetadata(
|
||||
(int)rowGroupId,
|
||||
(int)rowGroupMetadataResult.rowGroupNumRows,
|
||||
(int)rowGroupMetadataResult.rowGroupNumColumns,
|
||||
firstFileOffset ?? -1,
|
||||
rowGroupMetadataResult.rowGroupBytes,
|
||||
columnMetadatas.Sum(cm => cm.TotalCompressedSize ?? 0),
|
||||
columnMetadatas);
|
||||
}).Where(rg => rg is not null)!.ToList<IRowGroupMetadata>();
|
||||
#endregion
|
||||
|
||||
#region File Metadata
|
||||
using var metadataResult = await db.Connection.QueryAsync($"SELECT * FROM parquet_file_metadata('{db.ParquetFilePath}');");
|
||||
var fileMetadata = await metadataResult.GetSingleAsync();
|
||||
var createdBy = fileMetadata.IsDBNull(1) ? null : fileMetadata.GetString(1);
|
||||
var numRows = fileMetadata.GetInt64(2);
|
||||
var numRowGroups = fileMetadata.GetInt64(3);
|
||||
var parquetVersion = fileMetadata.GetInt64(4);
|
||||
var encryptionAlgorithm = fileMetadata.IsDBNull(5) ? null : fileMetadata.GetString(5);
|
||||
var footerSigningKeyMetadata = fileMetadata.IsDBNull(6) ? null : fileMetadata.GetString(6);
|
||||
#endregion
|
||||
|
||||
var metadata = new ParquetMetadata(schemaTree, rowGroups, (int)numRows, (int)parquetVersion, createdBy ?? string.Empty, (int)numRowGroups);
|
||||
return metadata;
|
||||
}
|
||||
|
||||
private record RowGroupMetadataResult(long rowGroupId, long rowGroupNumRows, long rowGroupNumColumns, long rowGroupBytes, long rowGroupCompressedBytes, long fileOffset);
|
||||
}
|
||||
|
||||
public class RowGroupMetadata : IRowGroupMetadata
|
||||
{
|
||||
public int Ordinal { get; }
|
||||
public int RowCount { get; }
|
||||
public int ColumnCount { get; }
|
||||
public ICollection<ISortingColumnMetadata>? SortingColumns { get; }
|
||||
public ICollection<IRowGroupColumnMetadata>? Columns { get; }
|
||||
public long FileOffset { get; }
|
||||
public long TotalByteSize { get; }
|
||||
public long TotalCompressedSize { get; }
|
||||
|
||||
public RowGroupMetadata(int ordinal, int rowCount, int columnCount, long fileOffset, long totalByteSize, long totalCompressedSize, List<RowGroupColumnMetadata> columnMetadatas)
|
||||
{
|
||||
this.Ordinal = ordinal;
|
||||
this.RowCount = rowCount;
|
||||
this.ColumnCount = columnCount;
|
||||
this.FileOffset = fileOffset;
|
||||
this.TotalByteSize = totalByteSize;
|
||||
this.TotalCompressedSize = totalCompressedSize;
|
||||
this.SortingColumns = null; //DuckDB doesn't seem to have info on this
|
||||
this.Columns = columnMetadatas.ToList<IRowGroupColumnMetadata>();
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnMetadata : IRowGroupColumnMetadata
|
||||
{
|
||||
public int? ColumnId { get; }
|
||||
|
||||
public string? PathInSchema { get; }
|
||||
|
||||
public string? Type { get; }
|
||||
|
||||
public int? NumValues { get; }
|
||||
|
||||
public long? TotalUncompressedSize { get; }
|
||||
|
||||
public long? TotalCompressedSize { get; }
|
||||
|
||||
public long? DataPageOffset { get; }
|
||||
|
||||
public long? IndexPageOffset { get; }
|
||||
|
||||
public long? DictionaryPageOffset { get; }
|
||||
|
||||
public IRowGroupColumnStatistics? Statistics { get; }
|
||||
|
||||
public long? BloomFilterOffset { get; }
|
||||
|
||||
public long? BloomFilterLength { get; }
|
||||
|
||||
public RowGroupColumnMetadata(
|
||||
int? columnId,
|
||||
string? pathInSchema,
|
||||
string? type,
|
||||
int? numValues,
|
||||
long? totalUncompressedSize,
|
||||
long? totalCompressedSize,
|
||||
long? dataPageOffset,
|
||||
long? indexPageOffset,
|
||||
long? dictionaryPageOffset,
|
||||
RowGroupColumnStatistics? statistics,
|
||||
long? bloomFilterOffset,
|
||||
long? bloomFilterLength)
|
||||
{
|
||||
ColumnId = columnId;
|
||||
PathInSchema = pathInSchema;
|
||||
Type = type;
|
||||
NumValues = numValues;
|
||||
TotalUncompressedSize = totalUncompressedSize;
|
||||
TotalCompressedSize = totalCompressedSize;
|
||||
DataPageOffset = dataPageOffset;
|
||||
IndexPageOffset = indexPageOffset;
|
||||
DictionaryPageOffset = dictionaryPageOffset;
|
||||
Statistics = statistics;
|
||||
BloomFilterOffset = bloomFilterOffset;
|
||||
BloomFilterLength = bloomFilterLength;
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnStatistics : IRowGroupColumnStatistics
|
||||
{
|
||||
public object? Min { get; }
|
||||
public object? Max { get; }
|
||||
public long? NullCount { get; }
|
||||
public long? DistinctCount { get; }
|
||||
public object? MinValue { get; }
|
||||
public object? MaxValue { get; }
|
||||
public bool? IsMinValueExact { get; }
|
||||
public bool? IsMaxValueExact { get; }
|
||||
|
||||
public RowGroupColumnStatistics(object? min, object? max, long? nullCount, long? distinctCount, object? minValue, object? maxValue, bool? isMinValueExact, bool? isMaxValueExact)
|
||||
{
|
||||
Min = min;
|
||||
Max = max;
|
||||
NullCount = nullCount;
|
||||
DistinctCount = distinctCount;
|
||||
MinValue = minValue;
|
||||
MaxValue = maxValue;
|
||||
IsMinValueExact = isMinValueExact;
|
||||
IsMaxValueExact = isMaxValueExact;
|
||||
}
|
||||
}
|
||||
}
|
||||
250
src/ParquetViewer.Engine.DuckDB/ParquetSchemaElement.cs
Normal file
250
src/ParquetViewer.Engine.DuckDB/ParquetSchemaElement.cs
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
using DuckDB.NET.Data;
|
||||
using DuckDB.NET.Native;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
public class ParquetSchemaElement : IParquetSchemaElement<ParquetSchemaElement>
|
||||
{
|
||||
public string Path { get; }
|
||||
|
||||
public ICollection<ParquetSchemaElement> Children { get; }
|
||||
|
||||
public bool IsPrimitive => this._clrType is not null;
|
||||
|
||||
public Type ClrType => this._clrType ?? this.FieldType switch
|
||||
{
|
||||
FieldTypeId.List => typeof(ListValue),
|
||||
FieldTypeId.Map => typeof(MapValue),
|
||||
FieldTypeId.Struct => typeof(StructValue),
|
||||
_ => throw new InvalidOperationException("Cannot determine CLR type for primitive field without ClrType information."),
|
||||
};
|
||||
|
||||
public FieldTypeId FieldType => this.ConvertedType switch
|
||||
{
|
||||
"LIST" => FieldTypeId.List,
|
||||
"MAP" => FieldTypeId.Map,
|
||||
"STRUCT" => FieldTypeId.Struct,
|
||||
_ => GuessFieldType(),
|
||||
};
|
||||
|
||||
/// <summary>
|
||||
/// DuckDB isn't good with metadata resolution it seems. So we have to guess the field type based on available metadata.
|
||||
/// </summary>
|
||||
private FieldTypeId GuessFieldType()
|
||||
{
|
||||
if (this._clrType is not null || (this.NumChildren ?? 0) <= 0)
|
||||
{
|
||||
if (this._repetitionType == RepetitionTypeId.Repeated)
|
||||
return FieldTypeId.List;
|
||||
else
|
||||
return FieldTypeId.Primitive;
|
||||
}
|
||||
|
||||
if (this.NumChildren == 2)
|
||||
{
|
||||
try
|
||||
{
|
||||
this.GetMapKeyValueField();
|
||||
return FieldTypeId.Map;
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
if (this.NumChildren == 1 && this._repetitionType == RepetitionTypeId.Repeated)
|
||||
return FieldTypeId.List;
|
||||
|
||||
return FieldTypeId.Struct;
|
||||
}
|
||||
|
||||
public RepetitionTypeId? RepetitionType => this._repetitionType;
|
||||
|
||||
public bool IsByteArrayType => _clrType == typeof(ByteArrayValue);
|
||||
|
||||
ICollection<IParquetSchemaElement> IParquetSchemaElement.Children => this.Children.ToList<IParquetSchemaElement>();
|
||||
|
||||
public string? Type => this._underlyingType;
|
||||
|
||||
private string? _underlyingType;
|
||||
public int? TypeLength { get; }
|
||||
private RepetitionTypeId? _repetitionType;
|
||||
public int? NumChildren { get; }
|
||||
public string? ConvertedType { get; }
|
||||
public int? Scale { get; }
|
||||
public int? Precision { get; }
|
||||
private string? _fieldId;
|
||||
public object? LogicalType { get; }
|
||||
private DuckDBType? _duckDbType;
|
||||
private Type? _clrType;
|
||||
|
||||
public ParquetSchemaElement(
|
||||
string path,
|
||||
string? underlyingType,
|
||||
int? typeLength,
|
||||
RepetitionTypeId? repetitionType,
|
||||
long? numChildren,
|
||||
string? convertedType,
|
||||
long? scale,
|
||||
long? precision,
|
||||
string? fieldId,
|
||||
string? logicalType,
|
||||
DuckDBType? duckDbType,
|
||||
Type? ClrType)
|
||||
{
|
||||
this.Children = new List<ParquetSchemaElement>();
|
||||
this.Path = path;
|
||||
this._underlyingType = underlyingType;
|
||||
this.TypeLength = typeLength;
|
||||
this._repetitionType = repetitionType;
|
||||
this.NumChildren = (int?)numChildren;
|
||||
this.ConvertedType = convertedType;
|
||||
this.Scale = (int?)scale;
|
||||
this.Precision = (int?)precision;
|
||||
this._fieldId = fieldId;
|
||||
this.LogicalType = logicalType;
|
||||
this._duckDbType = duckDbType;
|
||||
this._clrType = ClrType;
|
||||
}
|
||||
|
||||
public static ParquetSchemaElement FromRow(DuckDBDataReader row)
|
||||
{
|
||||
string columnName = row.GetString(1);
|
||||
string? columnTypeName = row.IsDBNull(2) ? null : row.GetString(2);
|
||||
string? typeLengthString = row.IsDBNull(3) ? null : row.GetString(3);
|
||||
string? repetitionTypeName = row.IsDBNull(4) ? null : row.GetString(4);
|
||||
long? numChildren = row.IsDBNull(5) ? null : row.GetInt64(5);
|
||||
string? convertedType = row.IsDBNull(6) ? null : row.GetString(6);
|
||||
long? scale = row.IsDBNull(7) ? null : row.GetInt64(7);
|
||||
long? precision = row.IsDBNull(8) ? null : row.GetInt64(8);
|
||||
string? fieldId = row.IsDBNull(9) ? null : row.GetString(9);
|
||||
string? logicalType = row.IsDBNull(10) ? null : row.GetString(10);
|
||||
string? duckDbTypeName = row.IsDBNull(11) ? null : row.GetString(11); //Note: This field isn't returned for complex types like LIST, MAP, STRUCT unfortunately
|
||||
|
||||
int? typeLength = int.TryParse(typeLengthString, out var typeLengthValue) ? typeLengthValue : null;
|
||||
|
||||
DuckDBType? duckDBType = null;
|
||||
Type? clrType = null;
|
||||
if (duckDbTypeName is not null)
|
||||
{
|
||||
(duckDBType, clrType) = DuckDBHelper.ParseDuckDBType(duckDbTypeName, columnTypeName);
|
||||
}
|
||||
|
||||
RepetitionTypeId? repetitionType = null;
|
||||
if (repetitionTypeName is not null)
|
||||
{
|
||||
repetitionType = repetitionTypeName.ToUpperInvariant() switch
|
||||
{
|
||||
"REQUIRED" => RepetitionTypeId.Required,
|
||||
"OPTIONAL" => RepetitionTypeId.Optional,
|
||||
"REPEATED" => RepetitionTypeId.Repeated,
|
||||
_ => throw new ArgumentOutOfRangeException(nameof(repetitionTypeName), $"Unsupported repetition type: {repetitionTypeName}")
|
||||
};
|
||||
}
|
||||
|
||||
var element = new ParquetSchemaElement(
|
||||
columnName,
|
||||
columnTypeName,
|
||||
typeLength,
|
||||
repetitionType,
|
||||
numChildren,
|
||||
convertedType,
|
||||
scale,
|
||||
precision,
|
||||
fieldId,
|
||||
logicalType,
|
||||
duckDBType,
|
||||
clrType);
|
||||
|
||||
return element;
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetSingleOrByName(string name)
|
||||
{
|
||||
if (this.Children.Count == 0)
|
||||
{
|
||||
throw new MalformedFieldException($"Field `{Path}` has no children. Expected '{name}'.");
|
||||
}
|
||||
|
||||
if (this.Children.Count == 1)
|
||||
{
|
||||
return this.Children.First();
|
||||
}
|
||||
else
|
||||
{
|
||||
return this.Children.FirstOrDefault(c => c.Path == name)
|
||||
?? throw new MalformedFieldException($"Field `{Path}` has no child named '{name}'");
|
||||
}
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetListField()
|
||||
{
|
||||
var field = this.GetSingleOrByName("list");
|
||||
return field;
|
||||
}
|
||||
public ParquetSchemaElement GetListItemField()
|
||||
{
|
||||
try
|
||||
{
|
||||
if (this.Children.Count == 0)
|
||||
{
|
||||
//Assume this is a 2-tier list...
|
||||
return this;
|
||||
}
|
||||
|
||||
var field = this.GetSingleOrByName("item");
|
||||
return field;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new UnsupportedFieldException($"Cannot load field `{this.Path}`. Invalid List type.", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetMapKeyValueField()
|
||||
{
|
||||
var field = this.GetSingleOrByName("key_value");
|
||||
return field;
|
||||
}
|
||||
public ParquetSchemaElement GetMapKeyField()
|
||||
{
|
||||
var field = this.GetChildCI("key");
|
||||
return field;
|
||||
}
|
||||
public ParquetSchemaElement GetMapValueField()
|
||||
{
|
||||
var field = this.GetChildCI("value");
|
||||
return field;
|
||||
}
|
||||
|
||||
public ParquetSchemaElement GetChildCI(string name) =>
|
||||
Children.First((f) => f.Path.Equals(name, StringComparison.InvariantCultureIgnoreCase));
|
||||
|
||||
public ParquetSchemaElement GetChild(string name)
|
||||
=> Children.First((f) => f.Path.Equals(name));
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChildCI(string name)
|
||||
=> GetChildCI(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChild(string name)
|
||||
=> GetChild(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListField()
|
||||
=> GetListField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListItemField()
|
||||
=> GetListItemField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetSingleOrByName(string name)
|
||||
=> GetSingleOrByName(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyValueField()
|
||||
=> GetMapKeyValueField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyField()
|
||||
=> GetMapKeyField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapValueField()
|
||||
=> GetMapValueField();
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
<Configurations>Debug;Release;Release_SelfContained</Configurations>
|
||||
<ProduceReferenceAssembly>False</ProduceReferenceAssembly>
|
||||
<EnforceCodeStyleInBuild>True</EnforceCodeStyleInBuild>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_SelfContained|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="DuckDB.NET.Data.Full" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
49
src/ParquetViewer.Engine.DuckDB/QueryResult.cs
Normal file
49
src/ParquetViewer.Engine.DuckDB/QueryResult.cs
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
using DuckDB.NET.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.DuckDB
|
||||
{
|
||||
internal class QueryResult : IAsyncEnumerable<DuckDBDataReader>, IDisposable
|
||||
{
|
||||
private readonly DuckDBDataReader _reader;
|
||||
|
||||
public QueryResult(DuckDBDataReader reader)
|
||||
{
|
||||
_reader = reader;
|
||||
}
|
||||
|
||||
public void Dispose()
|
||||
{
|
||||
try
|
||||
{
|
||||
_reader.DisposeAsync();
|
||||
}
|
||||
catch { }
|
||||
}
|
||||
|
||||
public async Task<DuckDBDataReader> GetSingleAsync()
|
||||
{
|
||||
if (await _reader.ReadAsync())
|
||||
{
|
||||
return _reader;
|
||||
}
|
||||
throw new InvalidOperationException("No rows found.");
|
||||
}
|
||||
|
||||
public async IAsyncEnumerator<DuckDBDataReader> GetAsyncEnumerator(CancellationToken cancellationToken = default)
|
||||
{
|
||||
if (!await _reader.ReadAsync())
|
||||
{
|
||||
yield break;
|
||||
}
|
||||
|
||||
yield return _reader;
|
||||
|
||||
while (await _reader.ReadAsync())
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
yield return _reader;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
60
src/ParquetViewer.Engine.ParquetNET/Helpers.cs
Normal file
60
src/ParquetViewer.Engine.ParquetNET/Helpers.cs
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
internal static class Helpers
|
||||
{
|
||||
#region Dubious Functions
|
||||
//This logic is a cluster f... right now. It blends https://www.aloneguid.uk/posts/2023/04/parquet-empty-vs-null
|
||||
//with some of my understanding of how the dremel algorithm works. No way will it work for all cases.
|
||||
|
||||
public static bool IsNull(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] <= field.CurrentDefinitionLevel - 1;
|
||||
|
||||
public static bool IsEmpty(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] == field.CurrentDefinitionLevel
|
||||
&& field.DataField?.MaxDefinitionLevel != dataColumn.DefinitionLevels[index] /*Fixes STRUCT_TYPE_TEST*/;
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Some parquet writers don't write null entries into the data array for empty and null lists.
|
||||
/// This throws off our logic so lets find all empty/null lists and add a null entry into
|
||||
/// the data array to align it with the repetition/definition levels.
|
||||
/// </summary>
|
||||
/// <param name="dataColumn">The parquet data column</param>
|
||||
public static IEnumerable<object> GetDataWithPaddedNulls(this Parquet.Data.DataColumn dataColumn, ParquetSchemaElement field)
|
||||
{
|
||||
var dataEnumerable = dataColumn.Data.Cast<object?>().Select(d => d ?? DBNull.Value);
|
||||
|
||||
int levelCount = dataColumn.DefinitionLevels?.Length ?? 0;
|
||||
if (levelCount > dataColumn.Data.Length)
|
||||
{
|
||||
dataEnumerable = GetDataWithPaddedNulls();
|
||||
|
||||
IEnumerable<object> GetDataWithPaddedNulls()
|
||||
{
|
||||
var index = -1;
|
||||
foreach (var data in dataColumn.Data)
|
||||
{
|
||||
index++;
|
||||
|
||||
while (dataColumn.IsEmpty(index, field) || dataColumn.IsNull(index, field))
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
|
||||
yield return data ?? DBNull.Value;
|
||||
}
|
||||
|
||||
//Need to handle case where last N rows are null/empty
|
||||
while (levelCount > index + 1)
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return dataEnumerable;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,10 @@
|
|||
using ParquetViewer.Engine.Types;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
internal class ListValueBuilder
|
||||
public class ListValueBuilder
|
||||
{
|
||||
private int[] _repetitionLevels;
|
||||
private int[] _definitionLevels;
|
||||
|
|
@ -17,7 +18,10 @@ namespace ParquetViewer.Engine
|
|||
ArgumentNullException.ThrowIfNull(data);
|
||||
ArgumentNullException.ThrowIfNull(type);
|
||||
|
||||
_type = type;
|
||||
if (type == typeof(byte[]))
|
||||
_type = typeof(ByteArrayValue);
|
||||
else
|
||||
_type = type;
|
||||
|
||||
//We assume they all have the same length
|
||||
_definitionLevels = definitionLevels;
|
||||
|
|
@ -58,7 +62,11 @@ namespace ParquetViewer.Engine
|
|||
var listValue = ReadListValue(rowRange, numberOfListParents, () =>
|
||||
{
|
||||
//TODO: optimize to avoid skipping all rows every time
|
||||
return _data.Skip(rowRange.Start.Value).Take(rowRange.End.Value - rowRange.Start.Value).ToArray();
|
||||
return _data
|
||||
.Select(data => data is byte[] bytes ? new ByteArrayValue(bytes) : data) //Need to handle byte array type separately
|
||||
.Skip(rowRange.Start.Value)
|
||||
.Take(rowRange.End.Value - rowRange.Start.Value)
|
||||
.ToArray();
|
||||
},
|
||||
(int index) =>
|
||||
{
|
||||
|
|
@ -213,4 +221,4 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,17 +1,20 @@
|
|||
using Parquet;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public partial class ParquetEngine
|
||||
{
|
||||
public static readonly string TotalRecordCountExtendedPropertyKey = "TOTAL_RECORD_COUNT";
|
||||
|
||||
public async Task<Func<bool, DataTable>> ReadRowsAsync(List<string> selectedFields, int offset, int recordCount, CancellationToken cancellationToken, IProgress<int>? progress = null)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfNegativeOrZero(recordCount, nameof(recordCount));
|
||||
ArgumentOutOfRangeException.ThrowIfNegative(offset, nameof(offset));
|
||||
|
||||
long recordsLeftToRead = recordCount;
|
||||
DataTableLite result = BuildDataTable(null, selectedFields, Math.Min(recordCount, (int)this.RecordCount));
|
||||
|
||||
|
|
@ -30,7 +33,6 @@ namespace ParquetViewer.Engine
|
|||
return (logProgress) =>
|
||||
{
|
||||
var datatable = result.ToDataTable(cancellationToken, logProgress ? progress : null);
|
||||
datatable.ExtendedProperties[TotalRecordCountExtendedPropertyKey] = result.DataSetSize;
|
||||
return datatable;
|
||||
};
|
||||
}
|
||||
|
|
@ -82,28 +84,30 @@ namespace ParquetViewer.Engine
|
|||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
var field = column.ParentSchema.GetChild(column.Name);
|
||||
switch (field.FieldType)
|
||||
var field = column.ParentSchema.Children.FirstOrDefault(c => c.Path == column.Name) as ParquetSchemaElement;
|
||||
switch (field?.FieldType)
|
||||
{
|
||||
case ParquetSchemaElement.FieldTypeId.Primitive:
|
||||
case FieldTypeId.Primitive:
|
||||
await ReadPrimitiveField(dataTable, groupReader, rowBeginIndex, field, skipRecords,
|
||||
readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
case ParquetSchemaElement.FieldTypeId.List:
|
||||
case FieldTypeId.List:
|
||||
var listField = field.GetListField();
|
||||
var itemField = listField.GetListItemField();
|
||||
var fieldIndex = dataTable.Columns[field.Path]!.Ordinal;
|
||||
await ReadListField(dataTable, groupReader, rowBeginIndex, itemField, fieldIndex,
|
||||
skipRecords, readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
case ParquetSchemaElement.FieldTypeId.Map:
|
||||
case FieldTypeId.Map:
|
||||
await ReadMapField(dataTable, groupReader, rowBeginIndex, field, skipRecords,
|
||||
readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
case ParquetSchemaElement.FieldTypeId.Struct:
|
||||
case FieldTypeId.Struct:
|
||||
await ReadStructField(dataTable, groupReader, rowBeginIndex, field, skipRecords,
|
||||
readRecords, isFirstColumn, cancellationToken, progress);
|
||||
break;
|
||||
default:
|
||||
throw new InvalidDataException($"`{column.Name}`");
|
||||
}
|
||||
|
||||
isFirstColumn = false;
|
||||
|
|
@ -151,7 +155,7 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
else if (fieldType == typeof(ByteArrayValue))
|
||||
{
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new ByteArrayValue(field.Path, (byte[])value);
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new ByteArrayValue((byte[])value);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -170,7 +174,7 @@ namespace ParquetViewer.Engine
|
|||
var lastMilestone = "Start";
|
||||
try
|
||||
{
|
||||
if (itemField.FieldType == ParquetSchemaElement.FieldTypeId.List)
|
||||
if (itemField.FieldType == FieldTypeId.List)
|
||||
{
|
||||
var nestedListField = itemField.GetListField();
|
||||
var nestedItemField = nestedListField.GetListItemField();
|
||||
|
|
@ -179,7 +183,7 @@ namespace ParquetViewer.Engine
|
|||
await ReadListField(dataTable, groupReader, rowBeginIndex, nestedItemField, fieldIndex: 0,
|
||||
skipRecords, readRecords, isFirstColumn, cancellationToken, progress);
|
||||
}
|
||||
else if (itemField.FieldType == ParquetSchemaElement.FieldTypeId.Primitive)
|
||||
else if (itemField.FieldType == FieldTypeId.Primitive)
|
||||
{
|
||||
int rowIndex = rowBeginIndex;
|
||||
|
||||
|
|
@ -212,7 +216,7 @@ namespace ParquetViewer.Engine
|
|||
progress?.Report(1);
|
||||
}
|
||||
}
|
||||
else if (itemField.FieldType == ParquetSchemaElement.FieldTypeId.Struct)
|
||||
else if (itemField.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
//Read struct data as a new datatable
|
||||
DataTableLite structFieldTable = BuildDataTable(itemField, itemField.Children.Select(f => f.Path).ToList(), (int)readRecords);
|
||||
|
|
@ -243,13 +247,24 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
|
||||
var columnValues = (ListValue)valueArray[columnOrdinal];
|
||||
for (var rowValueIndex = 0; rowValueIndex < columnValues.Length; rowValueIndex++)
|
||||
|
||||
if (columnValues.Data.Count == 0 && columnOrdinal != 0) //All values are null
|
||||
{
|
||||
for (var i = 0; i < newStructFieldTable.Rows.Count; i++)
|
||||
{
|
||||
newStructFieldTable.Rows[i][columnOrdinal] = DBNull.Value;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
for (var rowValueIndex = 0; rowValueIndex < columnValues.Data.Count; rowValueIndex++)
|
||||
{
|
||||
lastMilestone = $"#{rowIndex}-{columnOrdinal}-{rowValueIndex}";
|
||||
|
||||
var columnValue = columnValues.Data[rowValueIndex] ?? throw new SystemException("Column value missing during pivot");
|
||||
#region Hack for LIST_OF_STRUCT_OF_LIST_OF_STRUCT test
|
||||
if (columnValue is StructValue structValue && structValue.IsList)
|
||||
if (columnValue is StructValueExt structValue && structValue.IsList)
|
||||
{
|
||||
//We need to convert `columnValue` from struct to a list of structs as it was a nested structure
|
||||
var areTypesAsExpected = newStructFieldTable.Columns.Values.ElementAt(columnOrdinal).Type == typeof(ListValue);
|
||||
|
|
@ -258,14 +273,19 @@ namespace ParquetViewer.Engine
|
|||
throw new UnsupportedFieldException("Failed to pivot list of structs.");
|
||||
}
|
||||
|
||||
var nestedStructFieldTable = PivotTable(structValue.Data.Row, structValue.Data.Table.Clone());
|
||||
if (structValue.Data is not DataRowLite dataRowLite)
|
||||
{
|
||||
throw new InvalidDataException("Struct data wasn't the expected type.");
|
||||
}
|
||||
|
||||
var nestedStructFieldTable = PivotTable(structValue.Data.Row, dataRowLite.Table.Clone());
|
||||
var listValues = new ArrayList(nestedStructFieldTable.Rows.Count);
|
||||
for (var i = 0; i < nestedStructFieldTable.Rows.Count; i++)
|
||||
{
|
||||
var row = nestedStructFieldTable.GetRowAt(i);
|
||||
listValues.Add(new StructValue(itemField.Path, row));
|
||||
listValues.Add(new StructValueExt(row));
|
||||
}
|
||||
columnValue = new ListValue(listValues, typeof(StructValue));
|
||||
columnValue = new ListValue(listValues, typeof(StructValueExt));
|
||||
}
|
||||
#endregion
|
||||
|
||||
|
|
@ -296,7 +316,7 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
else
|
||||
{
|
||||
listValues.Add(new StructValue(itemField.Path, dataRow) { IsList = itemField.NumberOfListParents > 1 });
|
||||
listValues.Add(new StructValueExt(dataRow) { IsList = itemField.NumberOfListParents > 1 });
|
||||
}
|
||||
}
|
||||
return listValues;
|
||||
|
|
@ -307,7 +327,7 @@ namespace ParquetViewer.Engine
|
|||
if (isFirstColumn)
|
||||
dataTable.NewRow();
|
||||
|
||||
dataTable.Rows[rowIndex][fieldIndex] = new ListValue(listValues, typeof(StructValue));
|
||||
dataTable.Rows[rowIndex][fieldIndex] = new ListValue(listValues, typeof(StructValueExt));
|
||||
rowIndex++;
|
||||
}
|
||||
}
|
||||
|
|
@ -342,7 +362,7 @@ namespace ParquetViewer.Engine
|
|||
var keyDataEnumerable = keyDataColumn.GetDataWithPaddedNulls(keyField);
|
||||
var valueDataEnumerable = valueDataColumn.GetDataWithPaddedNulls(valueField);
|
||||
|
||||
var dataEnumerable = Helpers.PairEnumerables(keyDataEnumerable, valueDataEnumerable, DBNull.Value);
|
||||
var dataEnumerable = Engine.Helpers.PairEnumerables(keyDataEnumerable, valueDataEnumerable, DBNull.Value);
|
||||
|
||||
var levelCount = Math.Max(keyDataColumn.RepetitionLevels?.Length ?? 0, valueDataColumn.RepetitionLevels?.Length ?? 0);
|
||||
var fieldIndex = dataTable.Columns[field.Path]!.Ordinal;
|
||||
|
|
@ -447,7 +467,7 @@ namespace ParquetViewer.Engine
|
|||
else
|
||||
{
|
||||
var dataRow = structFieldTable.GetRowAt(i);
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new StructValue(field.Path, dataRow);
|
||||
dataTable.Rows[rowIndex]![fieldIndex] = new StructValueExt(dataRow);
|
||||
}
|
||||
rowIndex++;
|
||||
}
|
||||
|
|
@ -477,23 +497,23 @@ namespace ParquetViewer.Engine
|
|||
|
||||
private DataTableLite BuildDataTable(ParquetSchemaElement? parent, List<string> fields, int expectedRecordCount)
|
||||
{
|
||||
parent ??= this.ParquetSchemaTree;
|
||||
parent ??= (ParquetSchemaElement)this.Metadata.SchemaTree;
|
||||
DataTableLite dataTable = new(expectedRecordCount);
|
||||
foreach (var field in fields)
|
||||
{
|
||||
var schema = parent.GetChild(field);
|
||||
if (schema.FieldType == ParquetSchemaElement.FieldTypeId.List
|
||||
if (schema.FieldType == FieldTypeId.List
|
||||
|| schema.DataField?.IsArray == true)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(ListValue), parent);
|
||||
}
|
||||
else if (schema.FieldType == ParquetSchemaElement.FieldTypeId.Map)
|
||||
else if (schema.FieldType == FieldTypeId.Map)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(MapValue), parent);
|
||||
}
|
||||
else if (schema.FieldType == ParquetSchemaElement.FieldTypeId.Struct)
|
||||
else if (schema.FieldType == FieldTypeId.Struct)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(StructValue), parent);
|
||||
dataTable.AddColumn(field, typeof(StructValueExt), parent);
|
||||
}
|
||||
else if (schema.SchemaElement.Type == Parquet.Meta.Type.BYTE_ARRAY
|
||||
&& schema.SchemaElement.LogicalType is null
|
||||
|
|
@ -501,9 +521,20 @@ namespace ParquetViewer.Engine
|
|||
{
|
||||
dataTable.AddColumn(field, typeof(ByteArrayValue), parent);
|
||||
}
|
||||
else if (schema.DataField is DateTimeDataField dateField)
|
||||
{
|
||||
if (dateField.DateTimeFormat == DateTimeFormat.Date)
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(DateOnly), parent);
|
||||
}
|
||||
else
|
||||
{
|
||||
dataTable.AddColumn(field, typeof(DateTime), parent);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
var clrType = schema.DataField?.ClrType ?? throw new MalformedFieldException($"`{(parent is not null ? parent + "/" : string.Empty)}/{field}` has no data field");
|
||||
var clrType = schema.ClrType ?? throw new MalformedFieldException($"`{(parent is not null ? parent + "/" : string.Empty)}/{field}` has no data field");
|
||||
dataTable.AddColumn(field, clrType, parent);
|
||||
}
|
||||
}
|
||||
|
|
@ -535,4 +566,4 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
283
src/ParquetViewer.Engine.ParquetNET/ParquetEngine.cs
Normal file
283
src/ParquetViewer.Engine.ParquetNET/ParquetEngine.cs
Normal file
|
|
@ -0,0 +1,283 @@
|
|||
using Parquet;
|
||||
using Parquet.Meta;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public partial class ParquetEngine : IParquetEngine, IDisposable
|
||||
{
|
||||
private static readonly ParquetOptions _defaultParquetOptions = new () { UseDateOnlyTypeForDates = true, UseTimeOnlyTypeForTimeMicros = true, UseTimeOnlyTypeForTimeMillis = true };
|
||||
private readonly ParquetReader[] _parquetFiles;
|
||||
private long? _recordCount;
|
||||
|
||||
private ParquetReader _defaultReader => _parquetFiles.FirstOrDefault() ?? throw new ParquetEngineException("No parquet readers available");
|
||||
|
||||
private FileMetaData _thriftMetadata => _defaultReader.Metadata ?? throw new ParquetEngineException("No thrift metadata was found");
|
||||
|
||||
private ParquetSchema _schema => _defaultReader.Schema;
|
||||
|
||||
public Dictionary<string, string> CustomMetadata => _defaultReader.CustomMetadata;
|
||||
|
||||
public long RecordCount => _recordCount ??= _parquetFiles.Sum(pf => pf.Metadata?.NumRows ?? 0);
|
||||
|
||||
public int NumberOfPartitions => _parquetFiles.Length;
|
||||
|
||||
public List<string> Fields => _defaultReader.Schema.Fields.Select(f => f.Name).ToList();
|
||||
|
||||
public string Path { get; }
|
||||
|
||||
ParquetMetadata? _metadata = null;
|
||||
public IParquetMetadata Metadata => _metadata ??= new ParquetMetadata(_thriftMetadata, BuildParquetSchemaTree(), (int)RecordCount);
|
||||
|
||||
private ParquetEngine(string fileOrFolderPath, params ParquetReader[] parquetFiles)
|
||||
{
|
||||
_parquetFiles = parquetFiles ?? throw new ArgumentNullException(nameof(parquetFiles), "No parquet readers provided");
|
||||
Path = fileOrFolderPath;
|
||||
}
|
||||
|
||||
private ParquetSchemaElement BuildParquetSchemaTree()
|
||||
{
|
||||
var thriftSchema = _thriftMetadata.Schema ?? throw new ParquetException("No thrift metadata was found");
|
||||
var schemaElements = thriftSchema.GetEnumerator();
|
||||
var thriftSchemaTree = ReadSchemaTree(ref schemaElements);
|
||||
|
||||
foreach (var dataField in _schema.GetDataFields())
|
||||
{
|
||||
var field = thriftSchemaTree.GetChild(dataField.Path.FirstPart ?? throw new MalformedFieldException($"Field has no schema path: `{dataField.Name}`"));
|
||||
for (var i = 1; i < dataField.Path.Length; i++)
|
||||
{
|
||||
field = field.GetChild(dataField.Path[i]);
|
||||
}
|
||||
field.DataField = dataField; //if it doesn't have a child it's a datafield (I hope)
|
||||
}
|
||||
|
||||
return thriftSchemaTree;
|
||||
}
|
||||
|
||||
private static ParquetSchemaElement ReadSchemaTree(ref List<SchemaElement>.Enumerator schemaElements)
|
||||
{
|
||||
if (!schemaElements.MoveNext())
|
||||
throw new ParquetException("Invalid parquet schema");
|
||||
|
||||
var current = schemaElements.Current;
|
||||
var parquetSchemaElement = new ParquetSchemaElement(current);
|
||||
for (int i = 0; i < current.NumChildren; i++)
|
||||
{
|
||||
parquetSchemaElement.AddChild(ReadSchemaTree(ref schemaElements));
|
||||
}
|
||||
return parquetSchemaElement;
|
||||
}
|
||||
|
||||
public static Task<ParquetEngine> OpenFileOrFolderAsync(string fileOrFolderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (File.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFileAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else if (Directory.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFolderAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find file or folder at location: {fileOrFolderPath}");
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFileAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find parquet file at: {parquetFilePath}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(parquetFilePath, _defaultParquetOptions, cancellationToken);
|
||||
return new ParquetEngine(parquetFilePath, parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new FileReadException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFolderAsync(string folderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(folderPath)) //Handles null
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory doesn't exist: {folderPath}");
|
||||
}
|
||||
|
||||
var skippedFiles = new Dictionary<string, Exception>();
|
||||
var fileGroups = new Dictionary<ParquetSchema, List<ParquetReader>>();
|
||||
foreach (var file in Engine.Helpers.ListParquetFiles(folderPath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(file, _defaultParquetOptions, cancellationToken);
|
||||
if (!fileGroups.ContainsKey(parquetReader.Schema))
|
||||
{
|
||||
fileGroups.Add(parquetReader.Schema, new List<ParquetReader>());
|
||||
}
|
||||
|
||||
fileGroups[parquetReader.Schema].Add(parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
skippedFiles.Add(System.IO.Path.GetRelativePath(folderPath, file), ex);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileGroups.Keys.Count == 0)
|
||||
{
|
||||
if (skippedFiles.Count == 0)
|
||||
{
|
||||
throw new FileNotFoundException("Directory is empty");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AllFilesSkippedException(skippedFiles);
|
||||
}
|
||||
}
|
||||
else if (fileGroups.Keys.Count > 1)
|
||||
{
|
||||
//We found more than one type of schema.
|
||||
foreach (var fileGroupList in fileGroups.Values)
|
||||
{
|
||||
Engine.Helpers.EZDispose(fileGroupList);
|
||||
}
|
||||
|
||||
throw new MultipleSchemasFoundException(fileGroups.Keys.ToList()
|
||||
.Select(schema => schema.Fields.Select(f => f.Name).ToList()).ToList());
|
||||
}
|
||||
else if (skippedFiles.Count > 0)
|
||||
{
|
||||
//We found one schema but some files couldn't be read
|
||||
Engine.Helpers.EZDispose(fileGroups.Values.First());
|
||||
throw new SomeFilesSkippedException(skippedFiles);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
return new ParquetEngine(folderPath, fileGroups.Values.First().ToArray());
|
||||
}
|
||||
|
||||
private IEnumerable<(long RemainingOffset, ParquetReader ParquetReader)> GetReaders(long offset)
|
||||
{
|
||||
foreach (var parquetFile in _parquetFiles)
|
||||
{
|
||||
if (offset >= parquetFile.Metadata?.NumRows)
|
||||
{
|
||||
offset -= parquetFile.Metadata.NumRows;
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return (offset, parquetFile);
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
public async Task WriteDataToParquetFileAsync(DataTable dataTable, string path,
|
||||
CancellationToken cancellationToken, IProgress<int> progress, Dictionary<string, string>? customMetadata)
|
||||
{
|
||||
var fields = new List<Field>(dataTable.Columns.Count);
|
||||
foreach (DataColumn column in dataTable.Columns)
|
||||
{
|
||||
fields.Add(this._schema.Fields
|
||||
.Where(field => field.Name.Equals(column.ColumnName, StringComparison.InvariantCulture))
|
||||
.First());
|
||||
}
|
||||
var parquetSchema = new ParquetSchema(fields);
|
||||
|
||||
using var fs = new FileStream(path, FileMode.OpenOrCreate);
|
||||
using var parquetWriter = await ParquetWriter.CreateAsync(parquetSchema, fs, cancellationToken: cancellationToken);
|
||||
parquetWriter.CompressionLevel = System.IO.Compression.CompressionLevel.Optimal;
|
||||
if (customMetadata is not null)
|
||||
parquetWriter.CustomMetadata = customMetadata;
|
||||
|
||||
const int MAX_ROWS_PER_ROWGROUP = 100_000; //Without batching we sometimes get "OverflowException: Array dimensions exceeded supported range" from Parquet.NET
|
||||
var batchIndex = 0;
|
||||
var isLastBatch = false;
|
||||
while (!isLastBatch)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
using var rowGroup = parquetWriter.CreateRowGroup();
|
||||
foreach (var dataField in parquetSchema.DataFields)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var type = dataField.IsNullable ? GetNullableVersion(dataField.ClrType) : dataField.ClrType;
|
||||
var values = GetColumnValues(dataTable, type, dataField.Name, batchIndex * MAX_ROWS_PER_ROWGROUP, MAX_ROWS_PER_ROWGROUP);
|
||||
var dataColumn = new Parquet.Data.DataColumn(dataField, values);
|
||||
await rowGroup.WriteColumnAsync(dataColumn, cancellationToken);
|
||||
progress.Report(values.Length); //No way to report progress for each row, so do it by column
|
||||
isLastBatch = values.Length < MAX_ROWS_PER_ROWGROUP;
|
||||
}
|
||||
batchIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => Engine.Helpers.EZDispose(_parquetFiles);
|
||||
|
||||
private static System.Type GetNullableVersion(System.Type sourceType) => sourceType == null
|
||||
? throw new ArgumentNullException(nameof(sourceType))
|
||||
: !sourceType.IsValueType
|
||||
|| (sourceType.IsGenericType
|
||||
&& sourceType.GetGenericTypeDefinition() == typeof(Nullable<>))
|
||||
? sourceType
|
||||
: typeof(Nullable<>).MakeGenericType(sourceType);
|
||||
|
||||
private static Array GetColumnValues(DataTable dataTable, System.Type type, string columnName, int skipCount, int fetchCount)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(dataTable);
|
||||
ArgumentNullException.ThrowIfNull(type);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(skipCount, 0);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(fetchCount, 0);
|
||||
|
||||
if (!dataTable.Columns.Contains(columnName))
|
||||
throw new ArgumentException($"Column `{columnName}` does not exist in the datatable");
|
||||
|
||||
var recordCountAfterSkip = dataTable.Rows.Count - skipCount;
|
||||
var recordCountToRead = fetchCount > recordCountAfterSkip ? recordCountAfterSkip : fetchCount;
|
||||
var values = Array.CreateInstance(type, recordCountToRead);
|
||||
var index = 0;
|
||||
foreach (DataRow row in dataTable.Rows)
|
||||
{
|
||||
if (skipCount-- > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var value = row[columnName];
|
||||
if (value == DBNull.Value)
|
||||
value = null;
|
||||
else if (value is IByteArrayValue byteArray)
|
||||
value = byteArray.Data;
|
||||
else if (value is IListValue || value is IMapValue || value is IStructValue)
|
||||
throw new NotSupportedException("List, Map, and Struct types are currently not supported.");
|
||||
|
||||
values.SetValue(value, index++);
|
||||
|
||||
if (--fetchCount <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
}
|
||||
}
|
||||
305
src/ParquetViewer.Engine.ParquetNET/ParquetMetadata.cs
Normal file
305
src/ParquetViewer.Engine.ParquetNET/ParquetMetadata.cs
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
using Parquet.Meta;
|
||||
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public class ParquetMetadata : IParquetMetadata
|
||||
{
|
||||
public int ParquetVersion { get; }
|
||||
|
||||
public int RowGroupCount { get; }
|
||||
|
||||
public string CreatedBy { get; }
|
||||
|
||||
public ICollection<IRowGroupMetadata> RowGroups { get; }
|
||||
|
||||
public IParquetSchemaElement SchemaTree { get; }
|
||||
|
||||
public int RowCount { get; }
|
||||
|
||||
public ParquetMetadata(FileMetaData thriftMetadata, ParquetSchemaElement schemaTree, int recordCount)
|
||||
{
|
||||
RowCount = recordCount;
|
||||
RowGroupCount = thriftMetadata.RowGroups.Count;
|
||||
ParquetVersion = thriftMetadata.Version;
|
||||
CreatedBy = thriftMetadata.CreatedBy ?? string.Empty;
|
||||
SchemaTree = schemaTree;
|
||||
|
||||
List<RowGroupMetadata> rowGroupMetadataList = new();
|
||||
var rowGroupIndex = -1;
|
||||
foreach (var rowGroup in thriftMetadata.RowGroups)
|
||||
{
|
||||
rowGroupIndex++;
|
||||
|
||||
List<RowGroupColumnMetadata> columnMetadataList = new();
|
||||
var columnIndex = -1;
|
||||
foreach (var column in rowGroup.Columns)
|
||||
{
|
||||
columnIndex++;
|
||||
if (column.MetaData is null)
|
||||
continue;
|
||||
|
||||
ParquetSchemaElement? field = null;
|
||||
try
|
||||
{
|
||||
var currentNode = schemaTree;
|
||||
foreach (var path in column.MetaData.PathInSchema)
|
||||
{
|
||||
currentNode = currentNode.GetChild(path);
|
||||
}
|
||||
field = currentNode;
|
||||
}
|
||||
catch
|
||||
{
|
||||
/*swallow*/
|
||||
}
|
||||
|
||||
var columnMetadata = new RowGroupColumnMetadata(
|
||||
columnIndex,
|
||||
string.Join("/", column.MetaData.PathInSchema),
|
||||
column.MetaData.Type.ToString(),
|
||||
(int)column.MetaData.NumValues,
|
||||
column.MetaData.TotalUncompressedSize,
|
||||
column.MetaData.TotalCompressedSize,
|
||||
column.MetaData.DataPageOffset,
|
||||
column.MetaData.IndexPageOffset,
|
||||
column.MetaData.DictionaryPageOffset,
|
||||
column.MetaData.Statistics is not null ? new RowGroupColumnStatistics(
|
||||
column.MetaData.Statistics.Min,
|
||||
column.MetaData.Statistics.Max,
|
||||
column.MetaData.Statistics.NullCount,
|
||||
column.MetaData.Statistics.DistinctCount,
|
||||
column.MetaData.Statistics.MinValue,
|
||||
column.MetaData.Statistics.MaxValue,
|
||||
column.MetaData.Statistics.IsMinValueExact,
|
||||
column.MetaData.Statistics.IsMaxValueExact,
|
||||
field
|
||||
) : null,
|
||||
column.MetaData.BloomFilterOffset,
|
||||
column.MetaData.BloomFilterLength);
|
||||
|
||||
columnMetadataList.Add(columnMetadata);
|
||||
}
|
||||
|
||||
rowGroupMetadataList.Add(new RowGroupMetadata(
|
||||
rowGroup.Ordinal.HasValue ? (int)rowGroup.Ordinal.Value : rowGroupIndex,
|
||||
(int)rowGroup.NumRows,
|
||||
rowGroup.Columns.Count,
|
||||
rowGroup.SortingColumns?.Select(sc => new SortingColumnMetadata(sc.ColumnIdx, sc.Descending, sc.NullsFirst))
|
||||
.Cast<ISortingColumnMetadata>().ToList(),
|
||||
columnMetadataList.ToList<IRowGroupColumnMetadata>(),
|
||||
rowGroup.FileOffset ?? 0,
|
||||
rowGroup.TotalByteSize,
|
||||
rowGroup.TotalCompressedSize ?? 0));
|
||||
}
|
||||
|
||||
RowGroups = rowGroupMetadataList.ToList<IRowGroupMetadata>();
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupMetadata : IRowGroupMetadata
|
||||
{
|
||||
public int Ordinal { get; }
|
||||
|
||||
public int RowCount { get; }
|
||||
|
||||
public int ColumnCount { get; }
|
||||
|
||||
public ICollection<ISortingColumnMetadata>? SortingColumns { get; }
|
||||
|
||||
public ICollection<IRowGroupColumnMetadata>? Columns { get; }
|
||||
|
||||
public long FileOffset { get; }
|
||||
|
||||
public long TotalByteSize { get; }
|
||||
|
||||
public long TotalCompressedSize { get; }
|
||||
|
||||
public RowGroupMetadata(int ordinal, int rowCount, int columnCount, ICollection<ISortingColumnMetadata>? sortingColumnMetadata,
|
||||
ICollection<IRowGroupColumnMetadata>? columns, long fileOffset, long totalByteSize, long totalCompressedSize)
|
||||
{
|
||||
Ordinal = ordinal;
|
||||
RowCount = rowCount;
|
||||
ColumnCount = columnCount;
|
||||
SortingColumns = sortingColumnMetadata;
|
||||
Columns = columns;
|
||||
FileOffset = fileOffset;
|
||||
TotalByteSize = totalByteSize;
|
||||
TotalCompressedSize = totalCompressedSize;
|
||||
}
|
||||
}
|
||||
|
||||
public class SortingColumnMetadata : ISortingColumnMetadata
|
||||
{
|
||||
public int ColumnIdx { get; }
|
||||
public bool Descending { get; }
|
||||
public bool NullsFirst { get; }
|
||||
|
||||
public SortingColumnMetadata(int columnIdx, bool descending, bool nullsFirst)
|
||||
{
|
||||
ColumnIdx = columnIdx;
|
||||
Descending = descending;
|
||||
NullsFirst = nullsFirst;
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnMetadata : IRowGroupColumnMetadata
|
||||
{
|
||||
public int? ColumnId { get; }
|
||||
|
||||
public string? PathInSchema { get; }
|
||||
|
||||
public string? Type { get; }
|
||||
|
||||
public int? NumValues { get; }
|
||||
|
||||
public long? TotalUncompressedSize { get; }
|
||||
|
||||
public long? TotalCompressedSize { get; }
|
||||
|
||||
public long? DataPageOffset { get; }
|
||||
|
||||
public long? IndexPageOffset { get; }
|
||||
|
||||
public long? DictionaryPageOffset { get; }
|
||||
|
||||
public IRowGroupColumnStatistics? Statistics { get; }
|
||||
|
||||
public long? BloomFilterOffset { get; }
|
||||
|
||||
public long? BloomFilterLength { get; }
|
||||
|
||||
public RowGroupColumnMetadata(
|
||||
int? columnId,
|
||||
string? pathInSchema,
|
||||
string? type,
|
||||
int? numValues,
|
||||
long? totalUncompressedSize,
|
||||
long? totalCompressedSize,
|
||||
long? dataPageOffset,
|
||||
long? indexPageOffset,
|
||||
long? dictionaryPageOffset,
|
||||
RowGroupColumnStatistics? statistics,
|
||||
long? bloomFilterOffset,
|
||||
long? bloomFilterLength)
|
||||
{
|
||||
ColumnId = columnId;
|
||||
PathInSchema = pathInSchema;
|
||||
Type = type;
|
||||
NumValues = numValues;
|
||||
TotalUncompressedSize = totalUncompressedSize;
|
||||
TotalCompressedSize = totalCompressedSize;
|
||||
DataPageOffset = dataPageOffset;
|
||||
IndexPageOffset = indexPageOffset;
|
||||
DictionaryPageOffset = dictionaryPageOffset;
|
||||
Statistics = statistics;
|
||||
BloomFilterOffset = bloomFilterOffset;
|
||||
BloomFilterLength = bloomFilterLength;
|
||||
}
|
||||
}
|
||||
|
||||
public class RowGroupColumnStatistics : IRowGroupColumnStatistics
|
||||
{
|
||||
public object? Min { get; }
|
||||
public object? Max { get; }
|
||||
public long? NullCount { get; }
|
||||
public long? DistinctCount { get; }
|
||||
public object? MinValue { get; }
|
||||
public object? MaxValue { get; }
|
||||
public bool? IsMinValueExact { get; }
|
||||
public bool? IsMaxValueExact { get; }
|
||||
|
||||
public RowGroupColumnStatistics(object? min, object? max, long? nullCount, long? distinctCount,
|
||||
object? minValue, object? maxValue, bool? isMinValueExact, bool? isMaxValueExact, ParquetSchemaElement? field)
|
||||
{
|
||||
if (min is not null && minValue is not null && Engine.Helpers.ByteArraysEqual(min as byte[], minValue as byte[]) == 0)
|
||||
min = null; //don't show the same data twice in the deprecated field
|
||||
if (max is not null && maxValue is not null && Engine.Helpers.ByteArraysEqual(max as byte[], maxValue as byte[]) == 0)
|
||||
max = null; //don't show the same data twice in the deprecated field
|
||||
|
||||
Min = field is not null ? TryDeserializeValue(min as byte[], field) : min;
|
||||
Max = field is not null ? TryDeserializeValue(max as byte[], field) : max;
|
||||
NullCount = nullCount;
|
||||
DistinctCount = distinctCount;
|
||||
MinValue = field is not null ? TryDeserializeValue(minValue as byte[], field) : minValue;
|
||||
MaxValue = field is not null ? TryDeserializeValue(maxValue as byte[], field) : maxValue;
|
||||
IsMinValueExact = isMinValueExact;
|
||||
IsMaxValueExact = isMaxValueExact;
|
||||
}
|
||||
|
||||
private object? TryDeserializeValue(byte[]? value, ParquetSchemaElement field)
|
||||
{
|
||||
try
|
||||
{
|
||||
if (value == null || value.Length == 0)
|
||||
return value;
|
||||
|
||||
var type = field.ClrType;
|
||||
|
||||
if (type == typeof(string))
|
||||
return System.Text.Encoding.UTF8.GetString(value);
|
||||
|
||||
if (type == typeof(byte))
|
||||
return BitConverter.ToUInt32(value, 0);
|
||||
|
||||
if (type == typeof(sbyte))
|
||||
return BitConverter.ToInt32(value, 0);
|
||||
|
||||
if (type == typeof(short))
|
||||
return BitConverter.ToInt16(value, 0);
|
||||
|
||||
if (type == typeof(ushort))
|
||||
return BitConverter.ToUInt16(value, 0);
|
||||
|
||||
if (type == typeof(int))
|
||||
return BitConverter.ToInt32(value, 0);
|
||||
|
||||
if (type == typeof(uint))
|
||||
return BitConverter.ToUInt32(value, 0);
|
||||
|
||||
if (type == typeof(long))
|
||||
return BitConverter.ToInt64(value, 0);
|
||||
|
||||
if (type == typeof(ulong))
|
||||
return BitConverter.ToUInt64(value, 0);
|
||||
|
||||
if (type == typeof(float))
|
||||
return BitConverter.ToSingle(value, 0);
|
||||
|
||||
if (type == typeof(double))
|
||||
return BitConverter.ToDouble(value, 0);
|
||||
|
||||
if (type == typeof(bool))
|
||||
return BitConverter.ToBoolean(value, 0);
|
||||
|
||||
if (type == typeof(DateTime))
|
||||
{
|
||||
var ticks = BitConverter.ToInt64(value, 0);
|
||||
var timeUnit = field.SchemaElement.LogicalType?.TIMESTAMP?.Unit;
|
||||
|
||||
if (timeUnit?.MILLIS is not null)
|
||||
return DateTime.UnixEpoch.AddMilliseconds(ticks);
|
||||
else if (timeUnit?.MICROS is not null)
|
||||
return DateTime.UnixEpoch.AddMicroseconds(ticks);
|
||||
else if (timeUnit?.NANOS is not null)
|
||||
return DateTime.UnixEpoch.AddMicroseconds(ticks / 1000);
|
||||
else
|
||||
return ticks;
|
||||
}
|
||||
|
||||
if (type == typeof(DateOnly))
|
||||
return DateOnly.FromDateTime(DateTime.UnixEpoch)
|
||||
.AddDays(BitConverter.ToInt32(value, 0));
|
||||
|
||||
if (type == typeof(Guid))
|
||||
return new Guid(value);
|
||||
|
||||
//give up
|
||||
return value;
|
||||
}
|
||||
catch
|
||||
{
|
||||
return value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,13 @@
|
|||
using Parquet.Meta;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.ParquetNET.Types;
|
||||
using System.Text.Json;
|
||||
using System.Text.Json.Serialization.Metadata;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public class ParquetSchemaElement
|
||||
public class ParquetSchemaElement : IParquetSchemaElement
|
||||
{
|
||||
public string Path => SchemaElement.Name;
|
||||
public string PathWithParent => string.Concat(this.Parent?.Parent is not null /*exclude root node*/ ? (this.Parent.Path + "/") : string.Empty, Path);
|
||||
|
|
@ -40,9 +43,9 @@ namespace ParquetViewer.Engine
|
|||
{
|
||||
if (this.DataField is not null)
|
||||
return FieldTypeId.Primitive;
|
||||
else if (this.SchemaElement.LogicalType?.LIST is not null || this.SchemaElement.ConvertedType == ConvertedType.LIST)
|
||||
else if (this.SchemaElement.LogicalType?.LIST is not null || this.SchemaElement.ConvertedType == Parquet.Meta.ConvertedType.LIST)
|
||||
return FieldTypeId.List;
|
||||
else if (this.SchemaElement.LogicalType?.MAP is not null || this.SchemaElement.ConvertedType == ConvertedType.MAP)
|
||||
else if (this.SchemaElement.LogicalType?.MAP is not null || this.SchemaElement.ConvertedType == Parquet.Meta.ConvertedType.MAP)
|
||||
return FieldTypeId.Map;
|
||||
else if (this.SchemaElement.NumChildren > 0) //Struct
|
||||
return FieldTypeId.Struct;
|
||||
|
|
@ -168,27 +171,175 @@ namespace ParquetViewer.Engine
|
|||
return field;
|
||||
}
|
||||
public bool BelongsToListField => this._systemFieldType == SystemFieldTypeId.ListItemNode;
|
||||
public bool BelongsToListOfStructsField =>
|
||||
public bool BelongsToListOfStructsField =>
|
||||
this.Parent?._systemFieldType == SystemFieldTypeId.ListItemNode && this.Parent?.FieldType == FieldTypeId.Struct;
|
||||
public int NumberOfListParents => _parentsExcludingRoot.Count(field => field.SchemaElement.RepetitionType == FieldRepetitionType.REPEATED);
|
||||
public int NumberOfListParents => _parentsExcludingRoot.Count(@field => @field.SchemaElement.RepetitionType == FieldRepetitionType.REPEATED);
|
||||
|
||||
public int CurrentDefinitionLevel => _parentsExcludingRoot.Append(this)
|
||||
.Count(
|
||||
field => field.SchemaElement.RepetitionType == FieldRepetitionType.OPTIONAL
|
||||
|| (field._systemFieldType == SystemFieldTypeId.ListNode && field.Parent?._systemFieldType == SystemFieldTypeId.ListItemNode) //Fixes list-of-lists tests
|
||||
@field => @field.SchemaElement.RepetitionType == FieldRepetitionType.OPTIONAL
|
||||
|| (@field._systemFieldType == SystemFieldTypeId.ListNode && @field.Parent?._systemFieldType == SystemFieldTypeId.ListItemNode) //Fixes list-of-lists tests
|
||||
);
|
||||
|
||||
public bool IsPrimitive => FieldType == FieldTypeId.Primitive;
|
||||
|
||||
ICollection<IParquetSchemaElement> IParquetSchemaElement.Children => this.Children.ToList<IParquetSchemaElement>();
|
||||
|
||||
public System.Type ClrType => this.DataField?.ClrType ?? this.FieldType switch
|
||||
{
|
||||
FieldTypeId.List => typeof(ListValue),
|
||||
FieldTypeId.Map => typeof(MapValue),
|
||||
FieldTypeId.Struct => typeof(StructValueExt),
|
||||
_ => throw new InvalidOperationException("Cannot determine CLR type for primitive field without ClrType information."),
|
||||
};
|
||||
|
||||
public object? LogicalType => LogicalTypeToJSONObject(this.SchemaElement.LogicalType);
|
||||
|
||||
private static object? LogicalTypeToJSONObject(LogicalType? logicalType)
|
||||
{
|
||||
if (logicalType is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else if (logicalType.STRING is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.STRING) };
|
||||
}
|
||||
else if (logicalType.MAP is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.MAP) };
|
||||
}
|
||||
else if (logicalType.LIST is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.LIST) };
|
||||
}
|
||||
else if (logicalType.ENUM is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.ENUM) };
|
||||
}
|
||||
else if (logicalType.DECIMAL is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.DECIMAL),
|
||||
logicalType.DECIMAL.Scale,
|
||||
logicalType.DECIMAL.Precision
|
||||
};
|
||||
}
|
||||
else if (logicalType.DATE is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.DATE) };
|
||||
}
|
||||
else if (logicalType.TIME is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIME),
|
||||
logicalType.TIME.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIME.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.TIMESTAMP is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIMESTAMP),
|
||||
logicalType.TIMESTAMP.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIMESTAMP.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.INTEGER is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.INTEGER),
|
||||
logicalType.INTEGER.BitWidth,
|
||||
logicalType.INTEGER.IsSigned
|
||||
};
|
||||
}
|
||||
else if (logicalType.JSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.JSON) };
|
||||
}
|
||||
else if (logicalType.BSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.BSON) };
|
||||
}
|
||||
else if (logicalType.UUID is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.UUID) };
|
||||
}
|
||||
else if (logicalType.UNKNOWN is not null)
|
||||
{
|
||||
return new { Name = $"{logicalType.UNKNOWN.GetType().Name}" };
|
||||
}
|
||||
else
|
||||
{
|
||||
return new { Name = nameof(logicalType.UNKNOWN) };
|
||||
}
|
||||
}
|
||||
|
||||
static string TimeUnitToString(TimeUnit? timeUnit)
|
||||
{
|
||||
var timeUnitString = string.Empty;
|
||||
if (timeUnit?.MILLIS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MILLIS);
|
||||
}
|
||||
else if (timeUnit?.MICROS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MICROS);
|
||||
}
|
||||
else if (timeUnit?.NANOS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.NANOS);
|
||||
}
|
||||
return timeUnitString;
|
||||
}
|
||||
|
||||
public RepetitionTypeId? RepetitionType => this.SchemaElement.RepetitionType switch
|
||||
{
|
||||
FieldRepetitionType.REQUIRED => RepetitionTypeId.Required,
|
||||
FieldRepetitionType.OPTIONAL => RepetitionTypeId.Optional,
|
||||
FieldRepetitionType.REPEATED => RepetitionTypeId.Repeated,
|
||||
_ => null
|
||||
};
|
||||
|
||||
public int? TypeLength => this.SchemaElement.TypeLength;
|
||||
public int? NumChildren => this.SchemaElement.NumChildren;
|
||||
public string? ConvertedType => this.SchemaElement.ConvertedType?.ToString();
|
||||
public int? Scale => this.SchemaElement.Scale;
|
||||
public int? Precision => this.SchemaElement.Precision;
|
||||
object? IParquetSchemaElement.LogicalType => this.LogicalType;
|
||||
public string? Type => this.SchemaElement.Type?.ToString();
|
||||
|
||||
private Exception GetSystemFieldAccessException(SystemFieldTypeId fieldType)
|
||||
=> new InvalidOperationException($"Can't get {fieldType} node from '{this.Parent?._systemFieldType}' " +
|
||||
$"for `{this.Parent?.Path + '/' + this.Path}` with types '{this.Parent?.FieldType.ToString() + '/' + this.FieldType.ToString()}'");
|
||||
|
||||
public enum FieldTypeId
|
||||
{
|
||||
Primitive,
|
||||
List,
|
||||
Struct,
|
||||
Map
|
||||
}
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChildCI(string name)
|
||||
=> GetChildCI(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetChild(string name)
|
||||
=> GetChild(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListField()
|
||||
=> GetListField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetListItemField()
|
||||
=> GetListItemField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetSingleOrByName(string name)
|
||||
=> GetSingleOrByName(name);
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyValueField()
|
||||
=> GetMapKeyValueField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapKeyField()
|
||||
=> GetMapKeyField();
|
||||
|
||||
IParquetSchemaElement IParquetSchemaElement.GetMapValueField()
|
||||
=> GetMapValueField();
|
||||
|
||||
private enum SystemFieldTypeId
|
||||
{
|
||||
|
|
@ -200,4 +351,4 @@ namespace ParquetViewer.Engine
|
|||
MapValueNode
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
<Configurations>Debug;Release;Release_SelfContained</Configurations>
|
||||
<ProduceReferenceAssembly>False</ProduceReferenceAssembly>
|
||||
<EnforceCodeStyleInBuild>True</EnforceCodeStyleInBuild>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_SelfContained|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Parquet.Net" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
namespace ParquetViewer.Engine
|
||||
namespace ParquetViewer.Engine.ParquetNET
|
||||
{
|
||||
public class SimpleProgress : IProgress<int>
|
||||
{
|
||||
|
|
@ -11,4 +11,4 @@
|
|||
ProgressChanged?.Invoke(_progress);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
14
src/ParquetViewer.Engine.ParquetNET/Types/StructValue.cs
Normal file
14
src/ParquetViewer.Engine.ParquetNET/Types/StructValue.cs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
using ParquetViewer.Engine.Types;
|
||||
|
||||
namespace ParquetViewer.Engine.ParquetNET.Types
|
||||
{
|
||||
public class StructValueExt : StructValue
|
||||
{
|
||||
internal bool IsList { get; set; }
|
||||
|
||||
internal StructValueExt(DataRowLite data) : base(data)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,9 +4,9 @@ using static ParquetViewer.Engine.DataTableLite;
|
|||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
internal class DataTableLite
|
||||
public class DataTableLite
|
||||
{
|
||||
internal record ColumnLite(string Name, Type Type, ParquetSchemaElement ParentSchema, int Ordinal);
|
||||
public record ColumnLite(string Name, Type Type, IParquetSchemaElement ParentSchema, int Ordinal);
|
||||
|
||||
private int _ordinal = 0;
|
||||
private readonly Dictionary<string, ColumnLite> _columns = new();
|
||||
|
|
@ -30,10 +30,12 @@ namespace ParquetViewer.Engine
|
|||
|
||||
public DataTableLite(int expectedRowCount = 1000)
|
||||
{
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(expectedRowCount, 0);
|
||||
|
||||
this._rows = new(expectedRowCount);
|
||||
}
|
||||
|
||||
public ColumnLite AddColumn(string name, Type type, ParquetSchemaElement parent)
|
||||
public ColumnLite AddColumn(string name, Type type, IParquetSchemaElement parent)
|
||||
{
|
||||
if (_rows.Count > 0)
|
||||
{
|
||||
|
|
@ -132,12 +134,12 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
|
||||
internal class DataRowLite
|
||||
public class DataRowLite : IDataRowLite
|
||||
{
|
||||
public IReadOnlyCollection<string> ColumnNames => Columns.Keys;
|
||||
public Dictionary<string, ColumnLite> Columns { get; }
|
||||
public object[] Row { get; }
|
||||
public DataTableLite Table { get; }
|
||||
|
||||
public DataRowLite(object[] data, IEnumerable<ColumnLite> columns, DataTableLite table)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(data);
|
||||
|
|
@ -152,20 +154,6 @@ namespace ParquetViewer.Engine
|
|||
throw new ArgumentException($"Data length {data.Length} doesn't match number of columns {columns.Count()}", nameof(data));
|
||||
}
|
||||
}
|
||||
|
||||
public DataTable ToDataTable()
|
||||
{
|
||||
var dt = new DataTable();
|
||||
foreach (var column in this.Columns)
|
||||
{
|
||||
dt.Columns.Add(new DataColumn(column.Key, column.Value.Type));
|
||||
}
|
||||
var row = dt.NewRow();
|
||||
row.ItemArray = this.Row;
|
||||
dt.Rows.Add(row);
|
||||
return dt;
|
||||
}
|
||||
|
||||
public object GetValue(string columnName)
|
||||
{
|
||||
if (!this.Columns.ContainsKey(columnName))
|
||||
|
|
@ -185,4 +173,11 @@ namespace ParquetViewer.Engine
|
|||
throw new IndexOutOfRangeException($"Could not get value for column `{columnName}`");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public interface IDataRowLite
|
||||
{
|
||||
IReadOnlyCollection<string> ColumnNames { get; }
|
||||
object[] Row { get; }
|
||||
object GetValue(string columnName);
|
||||
}
|
||||
}
|
||||
17
src/ParquetViewer.Engine/Enums.cs
Normal file
17
src/ParquetViewer.Engine/Enums.cs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public enum RepetitionTypeId
|
||||
{
|
||||
Required,
|
||||
Optional,
|
||||
Repeated
|
||||
}
|
||||
|
||||
public enum FieldTypeId
|
||||
{
|
||||
Primitive,
|
||||
List,
|
||||
Struct,
|
||||
Map
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
public List<SkippedFile> SkippedFiles { get; private set; }
|
||||
|
||||
internal AllFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Could not open any files in directory.")
|
||||
public AllFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Could not open any files in directory.")
|
||||
{
|
||||
SkippedFiles = new List<SkippedFile>();
|
||||
if (skippedFiles is not null)
|
||||
|
|
@ -23,4 +23,4 @@
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,9 +6,10 @@
|
|||
public const int MAX_DECIMAL_PRECISION = 29;
|
||||
public const int MAX_DECIMAL_SCALE = 28;
|
||||
|
||||
public string FieldName { get; }
|
||||
public int Precision { get; }
|
||||
public int Scale { get; }
|
||||
public bool HasDetailedInfo => FieldName is not null || Precision is not null || Scale is not null;
|
||||
public string? FieldName { get; }
|
||||
public int? Precision { get; }
|
||||
public int? Scale { get; }
|
||||
|
||||
public DecimalOverflowException(string fieldName, int precision, int scale, OverflowException overflowEx) : base(overflowEx.Message, overflowEx)
|
||||
{
|
||||
|
|
@ -16,5 +17,10 @@
|
|||
this.Precision = precision;
|
||||
this.Scale = scale;
|
||||
}
|
||||
|
||||
public DecimalOverflowException(OverflowException overflowEx) : base(overflowEx.Message, overflowEx)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
{
|
||||
public MalformedFieldException(string message, Exception? ex = null) : base(message, ex) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -2,11 +2,11 @@
|
|||
{
|
||||
public class MultipleSchemasFoundException : Exception
|
||||
{
|
||||
public List<Parquet.Schema.ParquetSchema> Schemas;
|
||||
public List<List<string>> Schemas;
|
||||
|
||||
internal MultipleSchemasFoundException(List<Parquet.Schema.ParquetSchema> parquetSchemas) : base("Multiple schemas found in directory.")
|
||||
public MultipleSchemasFoundException(List<List<string>> parquetSchemas) : base("Multiple schemas found in directory.")
|
||||
{
|
||||
Schemas = parquetSchemas ?? new List<Parquet.Schema.ParquetSchema>();
|
||||
Schemas = parquetSchemas ?? new List<List<string>>();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,10 +1,10 @@
|
|||
namespace ParquetViewer.Engine.Exceptions
|
||||
{
|
||||
internal class ParquetEngineException : Exception
|
||||
public class ParquetEngineException : Exception
|
||||
{
|
||||
public ParquetEngineException(string? message = null, Exception? exception = null) : base(message, exception)
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
public List<SkippedFile> SkippedFiles { get; private set; }
|
||||
|
||||
internal SomeFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Some files could not be opened.")
|
||||
public SomeFilesSkippedException(IEnumerable<KeyValuePair<string, Exception>> skippedFiles) : base("Some files could not be opened.")
|
||||
{
|
||||
SkippedFiles = new List<SkippedFile>();
|
||||
|
||||
|
|
@ -20,4 +20,4 @@
|
|||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -7,4 +7,4 @@
|
|||
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,51 @@
|
|||
using System.Numerics;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Numerics;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
internal static class Helpers
|
||||
public static class Helpers
|
||||
{
|
||||
public static IEnumerable<(object?, object?)> PairEnumerables(IEnumerable<object?> enumerable1, IEnumerable<object?> enumerable2, object? missingIndexValue = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(enumerable1);
|
||||
ArgumentNullException.ThrowIfNull(enumerable2);
|
||||
|
||||
var enumerator1 = enumerable1.GetEnumerator();
|
||||
var enumerator2 = enumerable2.GetEnumerator();
|
||||
|
||||
var hasMore1 = enumerator1.MoveNext();
|
||||
var hasMore2 = enumerator2.MoveNext();
|
||||
while (hasMore1 || hasMore2)
|
||||
{
|
||||
yield return (hasMore1 ? enumerator1.Current : missingIndexValue, hasMore2 ? enumerator2.Current : missingIndexValue);
|
||||
hasMore1 = enumerator1.MoveNext();
|
||||
hasMore2 = enumerator2.MoveNext();
|
||||
}
|
||||
}
|
||||
|
||||
public static IEnumerable<(T, R)> PairEnumerables<T, R>(IEnumerable<T> enumerable1, IEnumerable<R> enumerable2)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(enumerable1);
|
||||
ArgumentNullException.ThrowIfNull(enumerable2);
|
||||
|
||||
var enumerator1 = enumerable1.GetEnumerator();
|
||||
var enumerator2 = enumerable2.GetEnumerator();
|
||||
|
||||
var hasMore1 = enumerator1.MoveNext();
|
||||
var hasMore2 = enumerator2.MoveNext();
|
||||
while (hasMore1 && hasMore2)
|
||||
{
|
||||
yield return (enumerator1.Current, enumerator2.Current);
|
||||
hasMore1 = enumerator1.MoveNext();
|
||||
hasMore2 = enumerator2.MoveNext();
|
||||
}
|
||||
|
||||
if (hasMore1 || hasMore2)
|
||||
{
|
||||
throw new InvalidDataException("Enumerables are of different lengths.");
|
||||
}
|
||||
}
|
||||
|
||||
public static int CompareTo(object? value, object? otherValue)
|
||||
{
|
||||
value ??= DBNull.Value;
|
||||
|
|
@ -29,76 +71,96 @@ namespace ParquetViewer.Engine
|
|||
}
|
||||
}
|
||||
|
||||
#region Dubious Functions
|
||||
//This logic is a cluster f... right now. It blends https://www.aloneguid.uk/posts/2023/04/parquet-empty-vs-null
|
||||
//with some of my understanding of how the dremel algorithm works. No way will it work for all cases.
|
||||
|
||||
public static bool IsNull(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] <= field.CurrentDefinitionLevel - 1;
|
||||
|
||||
public static bool IsEmpty(this Parquet.Data.DataColumn dataColumn, int index, ParquetSchemaElement field)
|
||||
=> dataColumn.DefinitionLevels?.Length > index && dataColumn.DefinitionLevels[index] == field.CurrentDefinitionLevel
|
||||
&& field.DataField?.MaxDefinitionLevel != dataColumn.DefinitionLevels[index] /*Fixes STRUCT_TYPE_TEST*/;
|
||||
#endregion
|
||||
|
||||
/// <summary>
|
||||
/// Some parquet writers don't write null entries into the data array for empty and null lists.
|
||||
/// This throws off our logic so lets find all empty/null lists and add a null entry into
|
||||
/// the data array to align it with the repetition/definition levels.
|
||||
/// </summary>
|
||||
/// <param name="dataColumn">The parquet data column</param>
|
||||
public static IEnumerable<object> GetDataWithPaddedNulls(this Parquet.Data.DataColumn dataColumn, ParquetSchemaElement field)
|
||||
public static void WriteValue(Utf8JsonWriterWithRunningLength jsonWriter, object value, bool truncateForDisplay)
|
||||
{
|
||||
var dataEnumerable = dataColumn.Data.Cast<object?>().Select(d => d ?? DBNull.Value);
|
||||
|
||||
int levelCount = dataColumn.DefinitionLevels?.Length ?? 0;
|
||||
if (levelCount > dataColumn.Data.Length)
|
||||
if (value is null)
|
||||
{
|
||||
dataEnumerable = GetDataWithPaddedNulls();
|
||||
|
||||
IEnumerable<object> GetDataWithPaddedNulls()
|
||||
{
|
||||
var index = -1;
|
||||
foreach (var data in dataColumn.Data)
|
||||
{
|
||||
index++;
|
||||
|
||||
while (dataColumn.IsEmpty(index, field) || dataColumn.IsNull(index, field))
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
|
||||
yield return data ?? DBNull.Value;
|
||||
}
|
||||
|
||||
//Need to handle case where last N rows are null/empty
|
||||
while (levelCount > index + 1)
|
||||
{
|
||||
yield return DBNull.Value;
|
||||
index++;
|
||||
}
|
||||
}
|
||||
//Value should never be null as we should be replacing all those with DBNull.Value
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
|
||||
return dataEnumerable;
|
||||
}
|
||||
|
||||
public static IEnumerable<(object?, object?)> PairEnumerables(IEnumerable<object?> enumerable1, IEnumerable<object?> enumerable2, object? missingIndexValue = null)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(enumerable1);
|
||||
ArgumentNullException.ThrowIfNull(enumerable2);
|
||||
|
||||
var enumerator1 = enumerable1.GetEnumerator();
|
||||
var enumerator2 = enumerable2.GetEnumerator();
|
||||
|
||||
var hasMore1 = enumerator1.MoveNext();
|
||||
var hasMore2 = enumerator2.MoveNext();
|
||||
while (hasMore1 || hasMore2)
|
||||
else if (value == DBNull.Value)
|
||||
{
|
||||
yield return (hasMore1 ? enumerator1.Current : missingIndexValue, hasMore2 ? enumerator2.Current : missingIndexValue);
|
||||
hasMore1 = enumerator1.MoveNext();
|
||||
hasMore2 = enumerator2.MoveNext();
|
||||
jsonWriter.WriteNullValue();
|
||||
}
|
||||
else if (value is string str)
|
||||
{
|
||||
jsonWriter.WriteStringValue(str);
|
||||
}
|
||||
else if (value is bool @bool)
|
||||
{
|
||||
jsonWriter.WriteBooleanValue(@bool);
|
||||
}
|
||||
else if (value.GetType().IsNumber())
|
||||
{
|
||||
jsonWriter.WriteNumberValue(Convert.ToDecimal(value));
|
||||
}
|
||||
else if (value is IStructValue @struct)
|
||||
{
|
||||
var json = @struct.ToJSON(out var success);
|
||||
if (success)
|
||||
jsonWriter.WriteRawValue(json);
|
||||
else
|
||||
jsonWriter.WriteStringValue(json);
|
||||
}
|
||||
else if (value is IMapValue map)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach ((object mapKey, object mapValue) in map)
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
jsonWriter.WritePropertyName("key");
|
||||
WriteValue(jsonWriter, mapKey, truncateForDisplay);
|
||||
jsonWriter.WritePropertyName("value");
|
||||
WriteValue(jsonWriter, mapValue, truncateForDisplay);
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is IListValue list)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach (var item in list)
|
||||
{
|
||||
WriteValue(jsonWriter, item, truncateForDisplay);
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is IByteArrayValue byteArray /*&& truncateForDisplay //should we use the entire byte array if
|
||||
* we're not truncating for display? Seems kind of unreasonable
|
||||
* for users to rely on binary data within a Struct value preview.*/)
|
||||
{
|
||||
const int byteArrayMaxStringLength = 24; //arbitrary number that I think looks good
|
||||
var byteArrayAsString = byteArray.ToStringTruncated(byteArrayMaxStringLength);
|
||||
jsonWriter.WriteStringValue(byteArrayAsString);
|
||||
}
|
||||
else if (value is DateTime dt)
|
||||
{
|
||||
//Write dates as string
|
||||
if (ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(dt.ToString(ParquetEngineSettings.DateDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(dt.ToString());
|
||||
}
|
||||
else if (value is DateOnly dateOnly)
|
||||
{
|
||||
//Write dates as string
|
||||
if (ParquetEngineSettings.DateOnlyDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(dateOnly.ToString(ParquetEngineSettings.DateOnlyDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(dateOnly.ToString());
|
||||
}
|
||||
else if (value is TimeOnly timeOnly)
|
||||
{
|
||||
//Write time as string
|
||||
if (ParquetEngineSettings.TimeOnlyDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(timeOnly.ToString(ParquetEngineSettings.TimeOnlyDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(timeOnly.ToString());
|
||||
}
|
||||
else
|
||||
{
|
||||
//Everything else just try to write it as string
|
||||
jsonWriter.WriteStringValue(value.ToString()!);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -107,5 +169,42 @@ namespace ParquetViewer.Engine
|
|||
/// </summary>
|
||||
public static bool IsNumber(this Type type) =>
|
||||
Array.Exists(type.GetInterfaces(), i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(INumber<>));
|
||||
|
||||
public static IEnumerable<string> ListParquetFiles(string folderPath)
|
||||
{
|
||||
var parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories)
|
||||
.Where(file =>
|
||||
file.EndsWith(".parquet") ||
|
||||
file.EndsWith(".parquet.gzip") ||
|
||||
file.EndsWith(".parquet.gz")
|
||||
);
|
||||
|
||||
if (!parquetFiles.Any())
|
||||
{
|
||||
//Check for extensionless files
|
||||
parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories);
|
||||
}
|
||||
|
||||
return parquetFiles.OrderBy(filename => filename);
|
||||
}
|
||||
|
||||
public static void EZDispose(IEnumerable<IDisposable> disposables)
|
||||
{
|
||||
if (disposables is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var disposable in disposables)
|
||||
{
|
||||
try
|
||||
{
|
||||
disposable?.Dispose();
|
||||
}
|
||||
catch { /* Swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
public static int ByteArraysEqual(ReadOnlySpan<byte> a1, ReadOnlySpan<byte> a2) => a1.SequenceCompareTo(a2);
|
||||
}
|
||||
}
|
||||
}
|
||||
20
src/ParquetViewer.Engine/IParquetEngine.cs
Normal file
20
src/ParquetViewer.Engine/IParquetEngine.cs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public interface IParquetEngine : IDisposable
|
||||
{
|
||||
List<string> Fields { get; }
|
||||
long RecordCount { get; }
|
||||
int NumberOfPartitions { get; }
|
||||
Dictionary<string, string> CustomMetadata { get; }
|
||||
string Path { get; }
|
||||
IParquetMetadata Metadata { get; }
|
||||
|
||||
Task<Func<bool, DataTable>> ReadRowsAsync(List<string> selectedFields, int offset, int recordCount,
|
||||
CancellationToken cancellationToken, IProgress<int>? progress = null);
|
||||
|
||||
Task WriteDataToParquetFileAsync(DataTable dataTable, string path, CancellationToken cancellationToken,
|
||||
IProgress<int> progress, Dictionary<string, string>? customMetadata);
|
||||
}
|
||||
}
|
||||
59
src/ParquetViewer.Engine/IParquetMetadata.cs
Normal file
59
src/ParquetViewer.Engine/IParquetMetadata.cs
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public interface IParquetMetadata
|
||||
{
|
||||
int ParquetVersion { get; }
|
||||
int RowGroupCount { get; }
|
||||
int RowCount { get; }
|
||||
string CreatedBy { get; }
|
||||
ICollection<IRowGroupMetadata> RowGroups { get; }
|
||||
IParquetSchemaElement SchemaTree { get; }
|
||||
}
|
||||
|
||||
public interface IRowGroupMetadata
|
||||
{
|
||||
int Ordinal { get; }
|
||||
int RowCount { get; }
|
||||
int ColumnCount { get; }
|
||||
ICollection<ISortingColumnMetadata>? SortingColumns { get; }
|
||||
ICollection<IRowGroupColumnMetadata>? Columns { get; }
|
||||
long FileOffset { get; }
|
||||
long TotalByteSize { get; }
|
||||
long TotalCompressedSize { get; }
|
||||
}
|
||||
|
||||
public interface ISortingColumnMetadata
|
||||
{
|
||||
public int ColumnIdx { get; }
|
||||
public bool Descending { get; }
|
||||
public bool NullsFirst { get; }
|
||||
}
|
||||
|
||||
public interface IRowGroupColumnMetadata
|
||||
{
|
||||
public int? ColumnId { get; }
|
||||
public string? PathInSchema { get; }
|
||||
public string? Type { get; }
|
||||
public int? NumValues { get; }
|
||||
public long? TotalUncompressedSize { get; }
|
||||
public long? TotalCompressedSize { get; }
|
||||
public long? DataPageOffset { get; }
|
||||
public long? IndexPageOffset { get; }
|
||||
public long? DictionaryPageOffset { get; }
|
||||
public IRowGroupColumnStatistics? Statistics { get; }
|
||||
public long? BloomFilterOffset { get; }
|
||||
public long? BloomFilterLength { get; }
|
||||
}
|
||||
|
||||
public interface IRowGroupColumnStatistics
|
||||
{
|
||||
public object? Min { get; }
|
||||
public object? Max { get; }
|
||||
public long? NullCount { get; }
|
||||
public long? DistinctCount { get; }
|
||||
public object? MinValue { get; }
|
||||
public object? MaxValue { get; }
|
||||
public bool? IsMinValueExact { get; }
|
||||
public bool? IsMaxValueExact { get; }
|
||||
}
|
||||
}
|
||||
69
src/ParquetViewer.Engine/IParquetSchemaElement.cs
Normal file
69
src/ParquetViewer.Engine/IParquetSchemaElement.cs
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public interface IParquetSchemaElement<T> : IParquetSchemaElement where T : IParquetSchemaElement
|
||||
{
|
||||
new string Path { get; }
|
||||
|
||||
new ICollection<T> Children { get; }
|
||||
|
||||
new Type ClrType { get; }
|
||||
|
||||
new FieldTypeId FieldType { get; }
|
||||
|
||||
new RepetitionTypeId? RepetitionType { get; }
|
||||
|
||||
new bool IsPrimitive { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Case insensitive version of <see cref="GetChild(string)"/>
|
||||
/// Only exists to deal with non-standard Parquet implementations
|
||||
/// </summary>
|
||||
new T GetChildCI(string name);
|
||||
new T GetChild(string name);
|
||||
new T GetListField();
|
||||
new T GetListItemField();
|
||||
new T GetSingleOrByName(string name);
|
||||
new T GetMapKeyValueField();
|
||||
new T GetMapKeyField();
|
||||
new T GetMapValueField();
|
||||
}
|
||||
|
||||
public interface IParquetSchemaElement
|
||||
{
|
||||
string Path { get; }
|
||||
|
||||
ICollection<IParquetSchemaElement> Children { get; }
|
||||
|
||||
[JsonIgnore]
|
||||
Type ClrType { get; }
|
||||
|
||||
FieldTypeId FieldType { get; }
|
||||
|
||||
RepetitionTypeId? RepetitionType { get; }
|
||||
|
||||
bool IsPrimitive { get; }
|
||||
|
||||
public string? Type { get; }
|
||||
public int? TypeLength { get; }
|
||||
public int? NumChildren { get; }
|
||||
public string? ConvertedType { get; }
|
||||
public int? Scale { get; }
|
||||
public int? Precision { get; }
|
||||
public object? LogicalType { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Case insensitive version of <see cref="GetChild(string)"/>
|
||||
/// Only exists to deal with non-standard Parquet implementations
|
||||
/// </summary>
|
||||
IParquetSchemaElement GetChildCI(string name);
|
||||
IParquetSchemaElement GetChild(string name);
|
||||
IParquetSchemaElement GetListField();
|
||||
IParquetSchemaElement GetListItemField();
|
||||
IParquetSchemaElement GetSingleOrByName(string name);
|
||||
IParquetSchemaElement GetMapKeyValueField();
|
||||
IParquetSchemaElement GetMapKeyField();
|
||||
IParquetSchemaElement GetMapValueField();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,219 +0,0 @@
|
|||
using Parquet;
|
||||
using Parquet.Meta;
|
||||
using Parquet.Schema;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public partial class ParquetEngine : IDisposable
|
||||
{
|
||||
private readonly ParquetReader[] _parquetFiles;
|
||||
private long? _recordCount;
|
||||
|
||||
public long RecordCount => _recordCount ??= _parquetFiles.Sum(pf => pf.Metadata?.NumRows ?? 0);
|
||||
|
||||
public int NumberOfPartitions => _parquetFiles.Length;
|
||||
|
||||
private ParquetReader DefaultReader => _parquetFiles.FirstOrDefault() ?? throw new ParquetEngineException("No parquet readers available");
|
||||
|
||||
public List<string> Fields => DefaultReader.Schema.Fields.Select(f => f.Name).ToList();
|
||||
|
||||
public FileMetaData ThriftMetadata => DefaultReader.Metadata ?? throw new ParquetEngineException("No thrift metadata was found");
|
||||
|
||||
public Dictionary<string, string> CustomMetadata => DefaultReader.CustomMetadata;
|
||||
|
||||
public ParquetSchema Schema => DefaultReader.Schema;
|
||||
|
||||
private ParquetSchemaElement? _parquetSchemaTree;
|
||||
public ParquetSchemaElement ParquetSchemaTree => _parquetSchemaTree ??= BuildParquetSchemaTree();
|
||||
|
||||
public string OpenFileOrFolderPath { get; }
|
||||
|
||||
private ParquetEngine(string fileOrFolderPath, params ParquetReader[] parquetFiles)
|
||||
{
|
||||
_parquetFiles = parquetFiles ?? throw new ArgumentNullException(nameof(parquetFiles), "No parquet readers provided");
|
||||
OpenFileOrFolderPath = fileOrFolderPath;
|
||||
}
|
||||
|
||||
private ParquetSchemaElement BuildParquetSchemaTree()
|
||||
{
|
||||
var thriftSchema = ThriftMetadata.Schema ?? throw new ParquetException("No thrift metadata was found");
|
||||
var schemaElements = thriftSchema.GetEnumerator();
|
||||
var thriftSchemaTree = ReadSchemaTree(ref schemaElements);
|
||||
|
||||
foreach (var dataField in Schema.GetDataFields())
|
||||
{
|
||||
var field = thriftSchemaTree.GetChild(dataField.Path.FirstPart ?? throw new MalformedFieldException($"Field has no schema path: `{dataField.Name}`"));
|
||||
for (var i = 1; i < dataField.Path.Length; i++)
|
||||
{
|
||||
field = field.GetChild(dataField.Path[i]);
|
||||
}
|
||||
field.DataField = dataField; //if it doesn't have a child it's a datafield (I hope)
|
||||
}
|
||||
|
||||
return thriftSchemaTree;
|
||||
}
|
||||
|
||||
private static ParquetSchemaElement ReadSchemaTree(ref List<SchemaElement>.Enumerator schemaElements)
|
||||
{
|
||||
if (!schemaElements.MoveNext())
|
||||
throw new ParquetException("Invalid parquet schema");
|
||||
|
||||
var current = schemaElements.Current;
|
||||
var parquetSchemaElement = new ParquetSchemaElement(current);
|
||||
for (int i = 0; i < current.NumChildren; i++)
|
||||
{
|
||||
parquetSchemaElement.AddChild(ReadSchemaTree(ref schemaElements));
|
||||
}
|
||||
return parquetSchemaElement;
|
||||
}
|
||||
|
||||
public static Task<ParquetEngine> OpenFileOrFolderAsync(string fileOrFolderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (File.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFileAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else if (Directory.Exists(fileOrFolderPath)) //Handles null
|
||||
{
|
||||
return OpenFolderAsync(fileOrFolderPath, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find file or folder at location: {fileOrFolderPath}");
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFileAsync(string parquetFilePath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!File.Exists(parquetFilePath)) //Handles null
|
||||
{
|
||||
throw new FileNotFoundException($"Could not find parquet file at: {parquetFilePath}");
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(parquetFilePath, null, cancellationToken);
|
||||
return new ParquetEngine(parquetFilePath, parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new FileReadException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public static async Task<ParquetEngine> OpenFolderAsync(string folderPath, CancellationToken cancellationToken)
|
||||
{
|
||||
if (!Directory.Exists(folderPath)) //Handles null
|
||||
{
|
||||
throw new DirectoryNotFoundException($"Directory doesn't exist: {folderPath}");
|
||||
}
|
||||
|
||||
var skippedFiles = new Dictionary<string, Exception>();
|
||||
var fileGroups = new Dictionary<ParquetSchema, List<ParquetReader>>();
|
||||
foreach (var file in ListParquetFiles(folderPath))
|
||||
{
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
try
|
||||
{
|
||||
var parquetReader = await ParquetReader.CreateAsync(file, null, cancellationToken);
|
||||
if (!fileGroups.ContainsKey(parquetReader.Schema))
|
||||
{
|
||||
fileGroups.Add(parquetReader.Schema, new List<ParquetReader>());
|
||||
}
|
||||
|
||||
fileGroups[parquetReader.Schema].Add(parquetReader);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
skippedFiles.Add(Path.GetRelativePath(folderPath, file), ex);
|
||||
}
|
||||
}
|
||||
|
||||
if (fileGroups.Keys.Count == 0)
|
||||
{
|
||||
if (skippedFiles.Count == 0)
|
||||
{
|
||||
throw new FileNotFoundException("Directory is empty");
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new AllFilesSkippedException(skippedFiles);
|
||||
}
|
||||
}
|
||||
else if (fileGroups.Keys.Count > 1)
|
||||
{
|
||||
//We found more than one type of schema.
|
||||
foreach (var fileGroupList in fileGroups.Values)
|
||||
{
|
||||
EZDispose(fileGroupList);
|
||||
}
|
||||
|
||||
throw new MultipleSchemasFoundException(fileGroups.Keys.ToList());
|
||||
}
|
||||
else if (skippedFiles.Count > 0)
|
||||
{
|
||||
//We found one schema but some files couldn't be read
|
||||
EZDispose(fileGroups.Values.First());
|
||||
throw new SomeFilesSkippedException(skippedFiles);
|
||||
}
|
||||
|
||||
cancellationToken.ThrowIfCancellationRequested();
|
||||
|
||||
return new ParquetEngine(folderPath, fileGroups.Values.First().ToArray());
|
||||
}
|
||||
|
||||
private IEnumerable<(long RemainingOffset, ParquetReader ParquetReader)> GetReaders(long offset)
|
||||
{
|
||||
foreach (var parquetFile in _parquetFiles)
|
||||
{
|
||||
if (offset >= parquetFile.Metadata?.NumRows)
|
||||
{
|
||||
offset -= parquetFile.Metadata.NumRows;
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return (offset, parquetFile);
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ListParquetFiles(string folderPath)
|
||||
{
|
||||
var parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories)
|
||||
.Where(file =>
|
||||
file.EndsWith(".parquet") ||
|
||||
file.EndsWith(".parquet.gzip") ||
|
||||
file.EndsWith(".parquet.gz")
|
||||
);
|
||||
|
||||
if (!parquetFiles.Any())
|
||||
{
|
||||
//Check for extensionless files
|
||||
parquetFiles = Directory.EnumerateFiles(folderPath, "*", SearchOption.AllDirectories);
|
||||
}
|
||||
|
||||
return parquetFiles.OrderBy(filename => filename);
|
||||
}
|
||||
|
||||
private static void EZDispose(IEnumerable<IDisposable> disposables)
|
||||
{
|
||||
if (disposables is null)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
foreach (var disposable in disposables)
|
||||
{
|
||||
try
|
||||
{
|
||||
disposable?.Dispose();
|
||||
}
|
||||
catch { /* Swallow */ }
|
||||
}
|
||||
}
|
||||
|
||||
public void Dispose() => EZDispose(_parquetFiles);
|
||||
}
|
||||
}
|
||||
|
|
@ -9,8 +9,10 @@ namespace ParquetViewer.Engine
|
|||
/// By default Parquet Engine will render Dates using the system culture's format.
|
||||
/// By setting this value a custom date format can be used instead.
|
||||
/// </summary>
|
||||
/// <remarks>Parquet Engine renders dates when converting <see cref="ListValue"/>,
|
||||
/// <see cref="StructValue"/>, and <see cref="MapValue"/> types to string.</remarks>
|
||||
/// <remarks>Parquet Engine renders dates when converting <see cref="IListValue"/>,
|
||||
/// <see cref="IStructValue"/>, and <see cref="IMapValue"/> types to string.</remarks>
|
||||
public static string? DateDisplayFormat { get; set; }
|
||||
public static string? DateOnlyDisplayFormat { get; set; }
|
||||
public static string? TimeOnlyDisplayFormat { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,12 +1,9 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net8.0</TargetFramework>
|
||||
<TargetFramework>net10.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<PlatformTarget>x64</PlatformTarget>
|
||||
<Configurations>Debug;Release;Release_SelfContained</Configurations>
|
||||
<ProduceReferenceAssembly>False</ProduceReferenceAssembly>
|
||||
<EnforceCodeStyleInBuild>True</EnforceCodeStyleInBuild>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
</PropertyGroup>
|
||||
|
|
@ -16,7 +13,4 @@
|
|||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release_SelfContained|AnyCPU'">
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Parquet.Net" />
|
||||
</ItemGroup>
|
||||
</Project>
|
||||
|
|
@ -1,41 +1,39 @@
|
|||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using static ParquetViewer.Engine.Types.IByteArrayValue;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public class ByteArrayValue : IComparable<ByteArrayValue>, IComparable
|
||||
public class ByteArrayValue : IByteArrayValue
|
||||
{
|
||||
public string Name { get; }
|
||||
public byte[] Data { get; }
|
||||
|
||||
|
||||
private DisplayFormat[]? _possibleDisplayFormats;
|
||||
public DisplayFormat[] PossibleDisplayFormats =>
|
||||
_possibleDisplayFormats ??= this.CalculatePossibleDisplayFormats();
|
||||
_possibleDisplayFormats ??= CalculatePossibleDisplayFormats();
|
||||
|
||||
public ByteArrayValue(string name, byte[] data)
|
||||
public ByteArrayValue(byte[] data)
|
||||
{
|
||||
this.Name = name;
|
||||
this.Data = data;
|
||||
Data = data;
|
||||
}
|
||||
|
||||
public override string ToString() => BitConverter.ToString(this.Data);
|
||||
public override string ToString() => BitConverter.ToString(Data);
|
||||
|
||||
public int CompareTo(ByteArrayValue? other)
|
||||
public int CompareTo(IByteArrayValue? other)
|
||||
{
|
||||
if (other?.Data is null)
|
||||
return 1;
|
||||
else if (this.Data is null)
|
||||
else if (Data is null)
|
||||
return -1;
|
||||
else
|
||||
return ByteArraysEqual(this.Data, other.Data);
|
||||
return Helpers.ByteArraysEqual(Data, other.Data);
|
||||
}
|
||||
|
||||
private static int ByteArraysEqual(ReadOnlySpan<byte> a1, ReadOnlySpan<byte> a2) => a1.SequenceCompareTo(a2);
|
||||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is ByteArrayValue byteArray)
|
||||
if (obj is IByteArrayValue byteArray)
|
||||
return CompareTo(byteArray);
|
||||
else
|
||||
return 1;
|
||||
|
|
@ -84,22 +82,6 @@ namespace ParquetViewer.Engine.Types
|
|||
return possibleDisplayFormats.ToArray();
|
||||
}
|
||||
|
||||
public enum DisplayFormat
|
||||
{
|
||||
Hex = 0, //Default hexadecimal format
|
||||
IPv6, // 16 bytes
|
||||
IPv4, // 4 bytes
|
||||
Guid, // 16 bytes
|
||||
Short, // 2 bytes
|
||||
Integer, // 4 bytes
|
||||
Long, // 8 bytes
|
||||
Float, // 4 bytes
|
||||
Double, // 8 bytes
|
||||
ASCII, // ASCII text if printable (any size)
|
||||
Base64, // Base64 encoded string (any size)
|
||||
Size // Size information (any size)
|
||||
}
|
||||
|
||||
#region Type Conversions
|
||||
public bool ToIPv6([NotNullWhen(true)] out IPAddress? ipAddress)
|
||||
{
|
||||
|
|
@ -165,7 +147,7 @@ namespace ParquetViewer.Engine.Types
|
|||
if (Data.Length == 0)
|
||||
return false;
|
||||
|
||||
var printableCount = this.Data.Sum(@byte =>
|
||||
var printableCount = Data.Sum(@byte =>
|
||||
@byte >= ' ' /*32*/ && @byte <= '~' /*126*/ //Printable ASCII range
|
||||
? 1 : 0);
|
||||
|
||||
|
|
@ -305,4 +287,4 @@ namespace ParquetViewer.Engine.Types
|
|||
+ BitConverter.ToString(Data, Data.Length - (maxBytesToRender / 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
45
src/ParquetViewer.Engine/Types/IByteArrayValue.cs
Normal file
45
src/ParquetViewer.Engine/Types/IByteArrayValue.cs
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Linq;
|
||||
using System.Net;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IByteArrayValue : IComparable<IByteArrayValue>, IComparable
|
||||
{
|
||||
byte[] Data { get; }
|
||||
DisplayFormat[] PossibleDisplayFormats { get; }
|
||||
|
||||
public enum DisplayFormat
|
||||
{
|
||||
Hex = 0, //Default hexadecimal format
|
||||
IPv6, // 16 bytes
|
||||
IPv4, // 4 bytes
|
||||
Guid, // 16 bytes
|
||||
Short, // 2 bytes
|
||||
Integer, // 4 bytes
|
||||
Long, // 8 bytes
|
||||
Float, // 4 bytes
|
||||
Double, // 8 bytes
|
||||
ASCII, // ASCII text if printable (any size)
|
||||
Base64, // Base64 encoded string (any size)
|
||||
Size // Size information (any size)
|
||||
}
|
||||
|
||||
string ToStringTruncated(int desiredLength);
|
||||
|
||||
bool ToIPv6([NotNullWhen(true)] out IPAddress? ipAddress);
|
||||
bool ToIPv4([NotNullWhen(true)] out IPAddress? ipAddress);
|
||||
bool ToGuid([NotNullWhen(true)] out Guid? guid);
|
||||
bool ToASCII([NotNullWhen(true)] out string? ascii);
|
||||
bool ToShort([NotNullWhen(true)] out short? @short);
|
||||
bool ToInteger([NotNullWhen(true)] out int? @int);
|
||||
bool ToLong([NotNullWhen(true)] out long? @long);
|
||||
bool ToFloat([NotNullWhen(true)] out float? @float);
|
||||
bool ToDouble([NotNullWhen(true)] out double? @double);
|
||||
void ToBase64(out string base64);
|
||||
}
|
||||
}
|
||||
15
src/ParquetViewer.Engine/Types/IListValue.cs
Normal file
15
src/ParquetViewer.Engine/Types/IListValue.cs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IListValue : IComparable<IListValue>, IComparable, IEnumerable<object>
|
||||
{
|
||||
public IList Data { get; }
|
||||
public Type Type { get; }
|
||||
}
|
||||
}
|
||||
14
src/ParquetViewer.Engine/Types/IMapValue.cs
Normal file
14
src/ParquetViewer.Engine/Types/IMapValue.cs
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
using System.Collections;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IMapValue : IComparable<IMapValue>, IComparable, IEnumerable<(object Key, object Value)>
|
||||
{
|
||||
public ArrayList Keys { get; }
|
||||
public Type KeyType { get; }
|
||||
public ArrayList Values { get; }
|
||||
public Type ValueType { get; }
|
||||
(object Key, object Value) GetMapValue(int index);
|
||||
int Length { get; }
|
||||
}
|
||||
}
|
||||
17
src/ParquetViewer.Engine/Types/IStructValue.cs
Normal file
17
src/ParquetViewer.Engine/Types/IStructValue.cs
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public interface IStructValue : IComparable<IStructValue>, IComparable
|
||||
{
|
||||
public IDataRowLite Data { get; }
|
||||
|
||||
IReadOnlyCollection<string> FieldNames { get; }
|
||||
|
||||
string ToStringTruncated(int desiredLength);
|
||||
|
||||
DataTable ToDataTable();
|
||||
|
||||
string ToJSON(out bool success, int? desiredLength = null);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,19 +1,13 @@
|
|||
using System.Collections;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public class ListValue : IComparable<ListValue>, IComparable, IEnumerable<object>
|
||||
public class ListValue : IListValue
|
||||
{
|
||||
public IList Data { get; }
|
||||
public Type? Type { get; private set; }
|
||||
|
||||
public int Length => Data.Count;
|
||||
|
||||
public ListValue(Array data)
|
||||
{
|
||||
Data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
Type = Data.GetType().GetElementType();
|
||||
}
|
||||
public Type Type { get; }
|
||||
|
||||
public ListValue(ArrayList data, Type type)
|
||||
{
|
||||
|
|
@ -35,14 +29,14 @@ namespace ParquetViewer.Engine.Types
|
|||
using var ms = new MemoryStream();
|
||||
using (var jsonWriter = new Utf8JsonWriterWithRunningLength(ms))
|
||||
{
|
||||
StructValue.WriteValue(jsonWriter, this, false);
|
||||
Helpers.WriteValue(jsonWriter, this, false);
|
||||
}
|
||||
ms.Position = 0;
|
||||
using var sr = new StreamReader(ms);
|
||||
return sr.ReadToEnd();
|
||||
}
|
||||
|
||||
public int CompareTo(ListValue? other)
|
||||
public int CompareTo(IListValue? other)
|
||||
{
|
||||
if (other is null)
|
||||
return 1;
|
||||
|
|
@ -72,7 +66,7 @@ namespace ParquetViewer.Engine.Types
|
|||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is ListValue list)
|
||||
if (obj is IListValue list)
|
||||
return CompareTo(list);
|
||||
else
|
||||
return 1;
|
||||
|
|
@ -88,4 +82,4 @@ namespace ParquetViewer.Engine.Types
|
|||
|
||||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,15 +1,21 @@
|
|||
using System.Collections;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Collections;
|
||||
using System.Text;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
namespace ParquetViewer.Engine
|
||||
{
|
||||
public class MapValue : IComparable<MapValue>, IComparable, IEnumerable<(object Key, object Value)>
|
||||
public class MapValue : IMapValue
|
||||
{
|
||||
public ArrayList Keys { get; }
|
||||
|
||||
public Type KeyType { get; }
|
||||
|
||||
public ArrayList Values { get; }
|
||||
|
||||
public Type ValueType { get; }
|
||||
|
||||
public int Length => Math.Max(Keys.Count, Values.Count);
|
||||
|
||||
public MapValue(ArrayList keys, Type keyType, ArrayList values, Type valueType)
|
||||
{
|
||||
if (keys is null)
|
||||
|
|
@ -36,49 +42,10 @@ namespace ParquetViewer.Engine.Types
|
|||
ValueType = valueType;
|
||||
}
|
||||
|
||||
public int Length => Keys.Count;
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var mapValuesStringBuilder = new StringBuilder("[");
|
||||
for (var i = 0; i < Length; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
mapValuesStringBuilder.Append(',');
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(FormatString(GetMapValue(i)));
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(']');
|
||||
return mapValuesStringBuilder.ToString();
|
||||
|
||||
static string FormatString((object Key, object Value) map)
|
||||
{
|
||||
string key;
|
||||
if (map.Key is DateTime dt && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
key = dt.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else
|
||||
key = map.Key?.ToString() ?? string.Empty;
|
||||
|
||||
string value;
|
||||
if (map.Value is DateTime dt2 && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
value = dt2.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else
|
||||
value = map.Value?.ToString() ?? string.Empty;
|
||||
|
||||
return $"({key},{value})";
|
||||
}
|
||||
}
|
||||
|
||||
private (object Key, object Value) GetMapValue(int index)
|
||||
=> (Keys[index] ?? DBNull.Value, Values[index] ?? DBNull.Value);
|
||||
|
||||
/// <summary>
|
||||
/// Sorts by Key first, then Value.
|
||||
/// </summary>
|
||||
public int CompareTo(MapValue? other)
|
||||
public int CompareTo(IMapValue? other)
|
||||
{
|
||||
if (other is null)
|
||||
return 1;
|
||||
|
|
@ -113,7 +80,7 @@ namespace ParquetViewer.Engine.Types
|
|||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is MapValue mapValue)
|
||||
if (obj is IMapValue mapValue)
|
||||
return CompareTo(mapValue);
|
||||
else
|
||||
return 1;
|
||||
|
|
@ -127,6 +94,51 @@ namespace ParquetViewer.Engine.Types
|
|||
}
|
||||
}
|
||||
|
||||
public (object Key, object Value) GetMapValue(int index)
|
||||
=> (Keys[index] ?? DBNull.Value, Values[index] ?? DBNull.Value);
|
||||
|
||||
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
|
||||
|
||||
public override string ToString()
|
||||
{
|
||||
var mapValuesStringBuilder = new StringBuilder("[");
|
||||
for (var i = 0; i < Length; i++)
|
||||
{
|
||||
if (i != 0)
|
||||
{
|
||||
mapValuesStringBuilder.Append(',');
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(FormatString(GetMapValue(i)));
|
||||
}
|
||||
|
||||
mapValuesStringBuilder.Append(']');
|
||||
return mapValuesStringBuilder.ToString();
|
||||
|
||||
static string FormatString((object Key, object Value) map)
|
||||
{
|
||||
string key;
|
||||
if (map.Key is DateTime dt && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
key = dt.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else if (map.Key is DateOnly dateOnly && ParquetEngineSettings.DateOnlyDisplayFormat is not null)
|
||||
key = dateOnly.ToString(ParquetEngineSettings.DateOnlyDisplayFormat);
|
||||
else if (map.Key is TimeOnly timeOnly && ParquetEngineSettings.TimeOnlyDisplayFormat is not null)
|
||||
key = timeOnly.ToString(ParquetEngineSettings.TimeOnlyDisplayFormat);
|
||||
else
|
||||
key = map.Key?.ToString() ?? string.Empty;
|
||||
|
||||
string value;
|
||||
if (map.Value is DateTime dt2 && ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
value = dt2.ToString(ParquetEngineSettings.DateDisplayFormat);
|
||||
else if (map.Value is DateOnly dateOnly && ParquetEngineSettings.DateOnlyDisplayFormat is not null)
|
||||
value = dateOnly.ToString(ParquetEngineSettings.DateOnlyDisplayFormat);
|
||||
else if (map.Value is TimeOnly timeOnly && ParquetEngineSettings.TimeOnlyDisplayFormat is not null)
|
||||
value = timeOnly.ToString(ParquetEngineSettings.TimeOnlyDisplayFormat);
|
||||
else
|
||||
value = map.Value?.ToString() ?? string.Empty;
|
||||
|
||||
return $"({key},{value})";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,211 +0,0 @@
|
|||
using System.Collections.Immutable;
|
||||
using System.Data;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public class StructValue : IComparable<StructValue>, IComparable
|
||||
{
|
||||
public string Name { get; }
|
||||
|
||||
internal DataRowLite Data { get; }
|
||||
|
||||
internal bool IsList { get; set; }
|
||||
|
||||
//TODO: Add a public constructor?
|
||||
internal StructValue(string name, DataRowLite data)
|
||||
{
|
||||
Name = name ?? throw new ArgumentNullException(nameof(name));
|
||||
Data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
}
|
||||
|
||||
public override string ToString() => ToJSON(out _);
|
||||
|
||||
public string ToStringTruncated(int desiredLength) => ToJSON(out _, desiredLength);
|
||||
|
||||
private string ToJSON(out bool success, int? desiredLength = null)
|
||||
{
|
||||
try
|
||||
{
|
||||
bool isTruncated = false;
|
||||
using var ms = new MemoryStream();
|
||||
using (var jsonWriter = new Utf8JsonWriterWithRunningLength(ms))
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
for (var i = 0; i < this.Data.Columns.Count; i++)
|
||||
{
|
||||
string columnName = this.Data.Columns.Values.ElementAt(i).Name
|
||||
//Remove the parent field name from columns when rendering the data as json in the gridview cell.
|
||||
.Replace($"{this.Name}/", string.Empty);
|
||||
jsonWriter.WritePropertyName(columnName);
|
||||
|
||||
object value = this.Data.Row[i];
|
||||
WriteValue(jsonWriter, value, desiredLength is not null);
|
||||
|
||||
if (desiredLength > 0 && jsonWriter.ApproximateStringLengthSoFar > desiredLength)
|
||||
{
|
||||
isTruncated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!isTruncated)
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
|
||||
ms.Position = 0;
|
||||
using var reader = new StreamReader(ms);
|
||||
var json = reader.ReadToEnd();
|
||||
if (isTruncated)
|
||||
{
|
||||
json += "[...]";
|
||||
}
|
||||
success = true;
|
||||
return json;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
success = false;
|
||||
return $"Error while serializing Struct field '{Name}': {Environment.NewLine}{Environment.NewLine}{ex}";
|
||||
}
|
||||
}
|
||||
|
||||
public DataTable ToDataTable() => this.Data.ToDataTable();
|
||||
|
||||
public static void WriteValue(Utf8JsonWriterWithRunningLength jsonWriter, object value, bool truncateForDisplay)
|
||||
{
|
||||
if (value is null)
|
||||
{
|
||||
//Value should never be null as we should be replacing all those with DBNull.Value
|
||||
throw new ArgumentNullException(nameof(value));
|
||||
}
|
||||
else if (value == DBNull.Value)
|
||||
{
|
||||
jsonWriter.WriteNullValue();
|
||||
}
|
||||
else if (value is string str)
|
||||
{
|
||||
jsonWriter.WriteStringValue(str);
|
||||
}
|
||||
else if (value is bool @bool)
|
||||
{
|
||||
jsonWriter.WriteBooleanValue(@bool);
|
||||
}
|
||||
else if (value.GetType().IsNumber())
|
||||
{
|
||||
jsonWriter.WriteNumberValue(Convert.ToDecimal(value));
|
||||
}
|
||||
else if (value is StructValue @struct)
|
||||
{
|
||||
var json = @struct.ToJSON(out var success);
|
||||
if (success)
|
||||
jsonWriter.WriteRawValue(json);
|
||||
else
|
||||
jsonWriter.WriteStringValue(json);
|
||||
}
|
||||
else if (value is MapValue map)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach ((object mapKey, object mapValue) in map)
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
jsonWriter.WritePropertyName("key");
|
||||
WriteValue(jsonWriter, mapKey, truncateForDisplay);
|
||||
jsonWriter.WritePropertyName("value");
|
||||
WriteValue(jsonWriter, mapValue, truncateForDisplay);
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is ListValue list)
|
||||
{
|
||||
jsonWriter.WriteStartArray();
|
||||
foreach (var item in list)
|
||||
{
|
||||
WriteValue(jsonWriter, item, truncateForDisplay);
|
||||
}
|
||||
jsonWriter.WriteEndArray();
|
||||
}
|
||||
else if (value is ByteArrayValue byteArray /*&& truncateForDisplay //should use the entire byte array if
|
||||
* we're not truncating for display? Seems kind of unreasonable
|
||||
* for users to rely on binary data within a Struct value preview.*/)
|
||||
{
|
||||
const int byteArrayMaxStringLength = 24; //arbitrary number that I think looks good
|
||||
var byteArrayAsString = byteArray.ToStringTruncated(byteArrayMaxStringLength);
|
||||
jsonWriter.WriteStringValue(byteArrayAsString);
|
||||
}
|
||||
else if (value is DateTime dt)
|
||||
{
|
||||
//Write dates as string
|
||||
if (ParquetEngineSettings.DateDisplayFormat is not null)
|
||||
jsonWriter.WriteStringValue(dt.ToString(ParquetEngineSettings.DateDisplayFormat));
|
||||
else
|
||||
jsonWriter.WriteStringValue(dt.ToString());
|
||||
}
|
||||
else
|
||||
{
|
||||
//Everything else just try to write it as string
|
||||
jsonWriter.WriteStringValue(value.ToString()!);
|
||||
}
|
||||
}
|
||||
|
||||
private IReadOnlyCollection<string> FieldNames => Data.Columns.Keys;
|
||||
|
||||
/// <summary>
|
||||
/// Sorts by field names first, then by values
|
||||
/// </summary>
|
||||
public int CompareTo(StructValue? other)
|
||||
{
|
||||
if (other?.Data is null || other.FieldNames.Count == 0)
|
||||
return 1;
|
||||
|
||||
if (Data is null || FieldNames.Count == 0)
|
||||
return -1;
|
||||
|
||||
var otherColumnNames = string.Join("|", other.FieldNames);
|
||||
var columnNames = string.Join("|", this.FieldNames);
|
||||
|
||||
int schemaComparison = columnNames.CompareTo(otherColumnNames);
|
||||
if (schemaComparison != 0)
|
||||
return schemaComparison;
|
||||
|
||||
int fieldCount = FieldNames.Count;
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
{
|
||||
var otherValue = other.Data.Row[i];
|
||||
var value = Data.Row[i];
|
||||
int comparison = Helpers.CompareTo(value, otherValue);
|
||||
if (comparison != 0)
|
||||
return comparison;
|
||||
}
|
||||
|
||||
return 0; //Both structs appear equal
|
||||
}
|
||||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is StructValue @struct)
|
||||
return CompareTo(@struct);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// https://huggingface.co/docs/hub/en/datasets-image#parquet-format
|
||||
/// </summary>
|
||||
/// <returns>True if this is a struct named "image" with "bytes" and "path" fields</returns>
|
||||
public bool IsHuggingFaceImageFormat([NotNullWhen(true)] out byte[]? data)
|
||||
{
|
||||
if (this.Name == "image" //Should we allow other names?
|
||||
&& FieldNames.Count == 2
|
||||
&& FieldNames.Contains("bytes")
|
||||
&& FieldNames.Contains("path")
|
||||
&& this.Data.GetValue("bytes") is ByteArrayValue byteArrayValue)
|
||||
{
|
||||
data = byteArrayValue.Data;
|
||||
return true;
|
||||
}
|
||||
data = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
119
src/ParquetViewer.Engine/Types/StructValueBase.cs
Normal file
119
src/ParquetViewer.Engine/Types/StructValueBase.cs
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
using System.Data;
|
||||
|
||||
namespace ParquetViewer.Engine.Types
|
||||
{
|
||||
public class StructValue : IStructValue
|
||||
{
|
||||
public IDataRowLite Data { get; }
|
||||
|
||||
public IReadOnlyCollection<string> FieldNames => Data.ColumnNames;
|
||||
|
||||
public StructValue(IDataRowLite data)
|
||||
{
|
||||
Data = data ?? throw new ArgumentNullException(nameof(data));
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Sorts by field names first, then by values
|
||||
/// </summary>
|
||||
public int CompareTo(IStructValue? other)
|
||||
{
|
||||
if (other?.Data is null || other.FieldNames.Count == 0)
|
||||
return 1;
|
||||
|
||||
if (Data is null || FieldNames.Count == 0)
|
||||
return -1;
|
||||
|
||||
var otherColumnNames = string.Join("|", other.FieldNames);
|
||||
var columnNames = string.Join("|", FieldNames);
|
||||
|
||||
int schemaComparison = columnNames.CompareTo(otherColumnNames);
|
||||
if (schemaComparison != 0)
|
||||
return schemaComparison;
|
||||
|
||||
int fieldCount = FieldNames.Count;
|
||||
for (var i = 0; i < fieldCount; i++)
|
||||
{
|
||||
var otherValue = other.Data.Row[i];
|
||||
var value = Data.Row[i];
|
||||
int comparison = Helpers.CompareTo(value, otherValue);
|
||||
if (comparison != 0)
|
||||
return comparison;
|
||||
}
|
||||
|
||||
return 0; //Both structs appear equal
|
||||
}
|
||||
|
||||
public int CompareTo(object? obj)
|
||||
{
|
||||
if (obj is IStructValue @struct)
|
||||
return CompareTo(@struct);
|
||||
else
|
||||
return 1;
|
||||
}
|
||||
|
||||
public DataTable ToDataTable()
|
||||
{
|
||||
var dt = new DataTable();
|
||||
foreach (var pair in Helpers.PairEnumerables(this.Data.ColumnNames, this.Data.Row))
|
||||
{
|
||||
var columnName = pair.Item1;
|
||||
var value = pair.Item2;
|
||||
var valueType = value != DBNull.Value ? value.GetType() : typeof(object);
|
||||
dt.Columns.Add(new DataColumn(columnName, valueType));
|
||||
}
|
||||
var row = dt.NewRow();
|
||||
row.ItemArray = this.Data.Row;
|
||||
dt.Rows.Add(row);
|
||||
return dt;
|
||||
}
|
||||
|
||||
public override string ToString() => ToJSON(out _);
|
||||
|
||||
public string ToStringTruncated(int desiredLength) => ToJSON(out _, desiredLength);
|
||||
|
||||
public string ToJSON(out bool success, int? desiredLength = null)
|
||||
{
|
||||
try
|
||||
{
|
||||
bool isTruncated = false;
|
||||
using var ms = new MemoryStream();
|
||||
using (var jsonWriter = new Utf8JsonWriterWithRunningLength(ms))
|
||||
{
|
||||
jsonWriter.WriteStartObject();
|
||||
for (var i = 0; i < Data.ColumnNames.Count; i++)
|
||||
{
|
||||
string columnName = Data.ColumnNames.ElementAt(i);
|
||||
jsonWriter.WritePropertyName(columnName);
|
||||
|
||||
object value = Data.Row[i];
|
||||
Helpers.WriteValue(jsonWriter, value, desiredLength is not null);
|
||||
|
||||
if (desiredLength > 0 && jsonWriter.ApproximateStringLengthSoFar > desiredLength)
|
||||
{
|
||||
isTruncated = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!isTruncated)
|
||||
jsonWriter.WriteEndObject();
|
||||
}
|
||||
|
||||
ms.Position = 0;
|
||||
using var reader = new StreamReader(ms);
|
||||
var json = reader.ReadToEnd();
|
||||
if (isTruncated)
|
||||
{
|
||||
json += "[...]";
|
||||
}
|
||||
success = true;
|
||||
return json;
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
success = false;
|
||||
return $"Error while serializing Struct field: {Environment.NewLine}{Environment.NewLine}{ex}";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -98,4 +98,4 @@ namespace ParquetViewer.Engine
|
|||
_writer.Dispose();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
src/ParquetViewer.Tests/Data/BYTEARRAY_VALUE_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/BYTEARRAY_VALUE_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/DECIMALS_OUTOFRANGE_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/DECIMALS_OUTOFRANGE_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/LIST_OF_NESTED_STRUCTS_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/LIST_OF_NESTED_STRUCTS_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/NESTED_MAPS_TEST.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/NESTED_MAPS_TEST.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/NESTED_STRUCTS_AND_LISTS.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/NESTED_STRUCTS_AND_LISTS.parquet
Normal file
Binary file not shown.
BIN
src/ParquetViewer.Tests/Data/TIME_ONLY_TYPE_PYARROW_V22.parquet
Normal file
BIN
src/ParquetViewer.Tests/Data/TIME_ONLY_TYPE_PYARROW_V22.parquet
Normal file
Binary file not shown.
|
|
@ -1,22 +1,74 @@
|
|||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using System.Data;
|
||||
using System.Text.Json;
|
||||
|
||||
[assembly: Parallelize(Scope = ExecutionScope.MethodLevel)]
|
||||
namespace ParquetViewer.Tests
|
||||
{
|
||||
[TestClass]
|
||||
public class EngineTests
|
||||
public class ParquetNETEngineTests : EngineTests
|
||||
{
|
||||
public EngineTests()
|
||||
public ParquetNETEngineTests() : base(
|
||||
useDuckDBEngine: false,
|
||||
canHandleNullComplexTypes: true,
|
||||
treatsTwoTierListAsStruct: true,
|
||||
"/")
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
[TestClass]
|
||||
public class DuckDBEngineTests : EngineTests
|
||||
{
|
||||
public DuckDBEngineTests() : base(
|
||||
useDuckDBEngine: true,
|
||||
canHandleNullComplexTypes: false,
|
||||
treatsTwoTierListAsStruct: false,
|
||||
", ")
|
||||
{
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
public abstract class EngineTests
|
||||
{
|
||||
private bool _useDuckDBEngine;
|
||||
private bool _canHandleNullComplexTypes;
|
||||
private bool _treatsTwoTierListAsStruct;
|
||||
private string _schemaPathSeperator;
|
||||
|
||||
public EngineTests(bool useDuckDBEngine, bool canHandleNullComplexTypes, bool treatsTwoTierListAsStruct, string schemaPathSeperator)
|
||||
{
|
||||
//Set a consistent date format for all tests
|
||||
ParquetEngineSettings.DateDisplayFormat = "yyyy-MM-dd HH:mm:ss";
|
||||
ParquetEngineSettings.DateOnlyDisplayFormat = "yyyy-MM-dd";
|
||||
ParquetEngineSettings.TimeOnlyDisplayFormat = "HH:mm:ss";
|
||||
|
||||
this._useDuckDBEngine = useDuckDBEngine;
|
||||
this._canHandleNullComplexTypes = canHandleNullComplexTypes;
|
||||
this._treatsTwoTierListAsStruct = treatsTwoTierListAsStruct;
|
||||
this._schemaPathSeperator = schemaPathSeperator;
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
private async Task<IParquetEngine> OpenFileOrFolderAsync(string path, CancellationToken cancellationToken)
|
||||
{
|
||||
if (this._useDuckDBEngine)
|
||||
{
|
||||
return await Engine.DuckDB.ParquetEngine.OpenFileOrFolderAsync(path, cancellationToken);
|
||||
}
|
||||
else
|
||||
{
|
||||
return await Engine.ParquetNET.ParquetEngine.OpenFileOrFolderAsync(path, cancellationToken);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task DECIMALS_AND_BOOLS_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DECIMALS_AND_BOOLS_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DECIMALS_AND_BOOLS_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(30, parquetEngine.RecordCount);
|
||||
Assert.HasCount(337, parquetEngine.Fields);
|
||||
|
|
@ -29,35 +81,35 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(DBNull.Value, dataTable.Rows[21][334]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task DATETIME_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DATETIME_TEST1.parquet", default);
|
||||
|
||||
Assert.AreEqual(10, parquetEngine.RecordCount);
|
||||
Assert.HasCount(3, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual("36/2015-16", dataTable.Rows[0][0]);
|
||||
Assert.AreEqual(new DateTime(2015, 07, 14, 0, 0, 0), dataTable.Rows[1][2]);
|
||||
Assert.AreEqual(new DateOnly(2015, 07, 14), dataTable.Rows[1][2]);
|
||||
Assert.AreEqual(new DateTime(2015, 07, 19, 18, 30, 0), dataTable.Rows[9][1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task DATETIME_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DATETIME_TEST2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DATETIME_TEST2.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(11, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual((long)1, dataTable.Rows[0][0]);
|
||||
Assert.AreEqual(new DateTime(1985, 12, 31, 0, 0, 0), dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(new DateTime(1, 1, 2, 0, 0, 0), dataTable.Rows[0][2]);
|
||||
Assert.AreEqual(new DateTime(9999, 12, 31, 0, 0, 0), dataTable.Rows[0][3]);
|
||||
Assert.AreEqual(new DateTime(9999, 12, 31, 0, 0, 0), dataTable.Rows[0][4]);
|
||||
Assert.AreEqual(new DateTime(1, 1, 1, 0, 0, 0), dataTable.Rows[0][5]);
|
||||
Assert.AreEqual(new DateOnly(1985, 12, 31), dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(new DateOnly(1, 1, 2), dataTable.Rows[0][2]);
|
||||
Assert.AreEqual(new DateOnly(9999, 12, 31), dataTable.Rows[0][3]);
|
||||
Assert.AreEqual(new DateOnly(9999, 12, 31), dataTable.Rows[0][4]);
|
||||
Assert.AreEqual(new DateOnly(1, 1, 1), dataTable.Rows[0][5]);
|
||||
Assert.AreEqual(new DateTime(1985, 4, 13, 13, 5, 0), dataTable.Rows[0][6]);
|
||||
Assert.AreEqual(new DateTime(1, 1, 2, 0, 0, 0), dataTable.Rows[0][7]);
|
||||
Assert.AreEqual(new DateTime(9999, 12, 31, 23, 59, 59), dataTable.Rows[0][8]);
|
||||
|
|
@ -65,10 +117,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(new DateTime(1, 1, 1, 0, 0, 0), dataTable.Rows[0][10]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task RANDOM_TEST_FILE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/RANDOM_TEST_FILE.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/RANDOM_TEST_FILE.parquet", default);
|
||||
|
||||
Assert.AreEqual(5, parquetEngine.RecordCount);
|
||||
Assert.HasCount(42, parquetEngine.Fields);
|
||||
|
|
@ -83,10 +135,11 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("DLIx12_SHIPCONF_BW15_20220812020138531.DWL", dataTable.Rows[1][41]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(DuckDBEngineTests), "DuckDB automatically appends _1 to the dupe column name")]
|
||||
public async Task SAME_COLUMN_NAME_DIFFERENT_CASING_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/SAME_COLUMN_NAME_DIFFERENT_CASING.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/SAME_COLUMN_NAME_DIFFERENT_CASING.parquet", default);
|
||||
|
||||
Assert.AreEqual(14610, parquetEngine.RecordCount);
|
||||
Assert.HasCount(12, parquetEngine.Fields);
|
||||
|
|
@ -96,17 +149,17 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("Duplicate column 'schema/TransPlan_NORMAL_v2' detected. Column names are case insensitive and must be unique.", ex.Message);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MULTIPLE_SCHEMAS_DETECTED_TEST()
|
||||
{
|
||||
var ex = await Assert.ThrowsAsync<MultipleSchemasFoundException>(() => ParquetEngine.OpenFileOrFolderAsync("Data", default));
|
||||
var ex = await Assert.ThrowsAsync<MultipleSchemasFoundException>(() => OpenFileOrFolderAsync("Data", default));
|
||||
Assert.AreEqual("Multiple schemas found in directory.", ex.Message);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task PARTITIONED_PARQUET_FILE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/PARTITIONED_PARQUET_FILE_TEST", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/PARTITIONED_PARQUET_FILE_TEST", default);
|
||||
|
||||
Assert.AreEqual(2000, parquetEngine.RecordCount);
|
||||
Assert.HasCount(9, parquetEngine.Fields);
|
||||
|
|
@ -128,93 +181,94 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("B000CTP5G2P2", dataTable.Rows[0][8]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task COLUMN_ENDING_IN_PERIOD_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/COLUMN_ENDING_IN_PERIOD_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/COLUMN_ENDING_IN_PERIOD_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(11, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(202252, dataTable.Rows[0][0]);
|
||||
Assert.IsFalse(dataTable.Rows[0]["Output as FP"] as bool?);
|
||||
Assert.IsFalse((bool)dataTable.Rows[0]["Output as FP"]);
|
||||
Assert.AreEqual((byte)0, dataTable.Rows[0]["Preorder FP equi."]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(DuckDBEngineTests), "DuckDB can't handle lists with null in them?")]
|
||||
public async Task LIST_TYPE_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
|
||||
|
||||
Assert.AreEqual(3, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][0]);
|
||||
Assert.AreEqual("[1,2,3]", dataTable.Rows[0][0].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[\"abc\",\"efg\",\"hij\"]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[1][0]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(@"[""abc"",""efg"",""hij""]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[1][0]);
|
||||
Assert.AreEqual("[null,1]", dataTable.Rows[1][0].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[2][1]);
|
||||
Assert.AreEqual(4, ((ListValue)dataTable.Rows[2][1]).Length);
|
||||
Assert.AreEqual("efg", ((ListValue)dataTable.Rows[2][1]).Data![0]);
|
||||
Assert.AreEqual(DBNull.Value, ((ListValue)dataTable.Rows[2][1]).Data![1]);
|
||||
Assert.AreEqual("xyz", ((ListValue)dataTable.Rows[2][1]).Data![3]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[2][1]);
|
||||
Assert.HasCount(4, (IListValue)dataTable.Rows[2][1]);
|
||||
Assert.AreEqual("efg", ((IListValue)dataTable.Rows[2][1]).Data![0]);
|
||||
Assert.AreEqual(DBNull.Value, ((IListValue)dataTable.Rows[2][1]).Data![1]);
|
||||
Assert.AreEqual("xyz", ((IListValue)dataTable.Rows[2][1]).Data![3]);
|
||||
|
||||
//Also try reading with a record offset
|
||||
dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 1, 1, default))(false);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][0]);
|
||||
Assert.AreEqual("[null,1]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_TYPE_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_TYPE_TEST2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_TYPE_TEST2.parquet", default);
|
||||
|
||||
Assert.AreEqual(8, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 2, 4, default))(false);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
|
||||
Assert.AreEqual("[1,2]", dataTable.Rows[0][1].ToString());
|
||||
Assert.AreEqual(1, ((ListValue)dataTable.Rows[0][1]).Data[0]);
|
||||
Assert.AreEqual(2, ((ListValue)dataTable.Rows[0][1]).Data[1]);
|
||||
Assert.AreEqual(1, ((IListValue)dataTable.Rows[0][1]).Data[0]);
|
||||
Assert.AreEqual(2, ((IListValue)dataTable.Rows[0][1]).Data[1]);
|
||||
|
||||
Assert.AreEqual(string.Empty, dataTable.Rows[1][1].ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[1][1]);
|
||||
|
||||
Assert.AreEqual("[]", dataTable.Rows[2][1].ToString());
|
||||
Assert.IsEmpty(((ListValue)dataTable.Rows[2][1]).Data.Cast<dynamic>());
|
||||
Assert.IsEmpty(((IListValue)dataTable.Rows[2][1]).Data.Cast<dynamic>());
|
||||
|
||||
Assert.AreEqual("[3,4]", dataTable.Rows[3][1].ToString());
|
||||
Assert.AreEqual(3, ((ListValue)dataTable.Rows[3][1]).Data[0]);
|
||||
Assert.AreEqual(4, ((ListValue)dataTable.Rows[3][1]).Data[1]);
|
||||
Assert.AreEqual(3, ((IListValue)dataTable.Rows[3][1]).Data[0]);
|
||||
Assert.AreEqual(4, ((IListValue)dataTable.Rows[3][1]).Data[1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MAP_TYPE_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MAP_TYPE_TEST1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/MAP_TYPE_TEST1.parquet", default);
|
||||
|
||||
Assert.AreEqual(2, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 2, default))(false);
|
||||
|
||||
Assert.IsInstanceOfType<MapValue>(dataTable.Rows[0][0]);
|
||||
var row = (MapValue)dataTable.Rows[0][0];
|
||||
Assert.IsInstanceOfType<IMapValue>(dataTable.Rows[0][0]);
|
||||
var row = (IMapValue)dataTable.Rows[0][0];
|
||||
Assert.AreEqual("id", row.FirstOrDefault().Key);
|
||||
Assert.AreEqual("something", row.FirstOrDefault().Value);
|
||||
Assert.AreEqual("value2", row.Skip(1).FirstOrDefault().Key);
|
||||
Assert.AreEqual("else", row.Skip(1).FirstOrDefault().Value);
|
||||
Assert.AreEqual("[(id,something),(value2,else)]", row.ToString());
|
||||
|
||||
Assert.IsInstanceOfType<MapValue>(dataTable.Rows[1][0]);
|
||||
row = (MapValue)dataTable.Rows[1][0];
|
||||
Assert.IsInstanceOfType<IMapValue>(dataTable.Rows[1][0]);
|
||||
row = (IMapValue)dataTable.Rows[1][0];
|
||||
Assert.AreEqual("id", row.FirstOrDefault().Key);
|
||||
Assert.AreEqual("something2", row.FirstOrDefault().Value);
|
||||
Assert.AreEqual("value", row.Skip(1).FirstOrDefault().Key);
|
||||
|
|
@ -222,90 +276,90 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("[(id,something2),(value,else2)]", row.ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MAP_TYPE_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MAP_TYPE_TEST2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/MAP_TYPE_TEST2.parquet", default);
|
||||
|
||||
Assert.AreEqual(8, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 2, 4, default))(false);
|
||||
Assert.IsInstanceOfType<MapValue>(dataTable.Rows[0][1]);
|
||||
Assert.IsInstanceOfType<IMapValue>(dataTable.Rows[0][1]);
|
||||
|
||||
Assert.AreEqual("[(1,1),(2,2)]", dataTable.Rows[0][1].ToString());
|
||||
Assert.AreEqual(1, ((MapValue)dataTable.Rows[0][1]).Keys[0]);
|
||||
Assert.AreEqual(1, ((MapValue)dataTable.Rows[0][1]).Values[0]);
|
||||
Assert.AreEqual(2, ((MapValue)dataTable.Rows[0][1]).Keys[1]);
|
||||
Assert.AreEqual(2, ((MapValue)dataTable.Rows[0][1]).Values[1]);
|
||||
Assert.AreEqual(1, ((IMapValue)dataTable.Rows[0][1]).Keys[0]);
|
||||
Assert.AreEqual(1, ((IMapValue)dataTable.Rows[0][1]).Values[0]);
|
||||
Assert.AreEqual(2, ((IMapValue)dataTable.Rows[0][1]).Keys[1]);
|
||||
Assert.AreEqual(2, ((IMapValue)dataTable.Rows[0][1]).Values[1]);
|
||||
|
||||
Assert.AreEqual(string.Empty, dataTable.Rows[1][1].ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[1][1]);
|
||||
|
||||
Assert.AreEqual("[]", dataTable.Rows[2][1].ToString());
|
||||
Assert.IsEmpty(((MapValue)dataTable.Rows[2][1]).Keys.Cast<dynamic>());
|
||||
Assert.IsEmpty(((MapValue)dataTable.Rows[2][1]).Values.Cast<dynamic>());
|
||||
Assert.IsEmpty(((IMapValue)dataTable.Rows[2][1]).Keys.Cast<dynamic>());
|
||||
Assert.IsEmpty(((IMapValue)dataTable.Rows[2][1]).Values.Cast<dynamic>());
|
||||
|
||||
Assert.AreEqual("[(3,3),(4,4)]", dataTable.Rows[3][1].ToString());
|
||||
Assert.AreEqual(3, ((MapValue)dataTable.Rows[3][1]).Keys[0]);
|
||||
Assert.AreEqual(3, ((MapValue)dataTable.Rows[3][1]).Values[0]);
|
||||
Assert.AreEqual(4, ((MapValue)dataTable.Rows[3][1]).Keys[1]);
|
||||
Assert.AreEqual(4, ((MapValue)dataTable.Rows[3][1]).Values[1]);
|
||||
Assert.AreEqual(3, ((IMapValue)dataTable.Rows[3][1]).Keys[0]);
|
||||
Assert.AreEqual(3, ((IMapValue)dataTable.Rows[3][1]).Values[0]);
|
||||
Assert.AreEqual(4, ((IMapValue)dataTable.Rows[3][1]).Keys[1]);
|
||||
Assert.AreEqual(4, ((IMapValue)dataTable.Rows[3][1]).Values[1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task STRUCT_TYPE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/STRUCT_TYPE_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/STRUCT_TYPE_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(10, parquetEngine.RecordCount);
|
||||
Assert.HasCount(6, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[2][0]);
|
||||
Assert.AreEqual("{\"appId\":\"e4a20b59-dd0e-4c50-b074-e8ae4786df30\",\"version\":0,\"lastUpdated\":1564524299648}", ((StructValue)dataTable.Rows[2][0]).ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][1]);
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[5][1]);
|
||||
Assert.AreEqual("{\"path\":\"part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet\",\"partitionValues\":[],\"size\":404,\"modificationTime\":1564524299000,\"dataChange\":false,\"stats\":null,\"tags\":null}", ((StructValue)dataTable.Rows[5][1]).ToString());
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[3][2]);
|
||||
Assert.AreEqual("{\"path\":\"part-00000-512e1537-8aaa-4193-b8b4-bef3de0de409-c000.snappy.parquet\",\"deletionTimestamp\":1564524298213,\"dataChange\":false}", ((StructValue)dataTable.Rows[3][2]).ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][3]);
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[1][3]);
|
||||
Assert.AreEqual("{\"id\":\"22ef18ba-191c-4c36-a606-3dad5cdf3830\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":[]},\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"value\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\"partitionColumns\":null,\"configuration\":[],\"createdTime\":1564524294376}", ((StructValue)dataTable.Rows[1][3]).ToString());
|
||||
Assert.IsInstanceOfType<StructValue>(dataTable.Rows[0][4]);
|
||||
Assert.AreEqual("{\"minReaderVersion\":1,\"minWriterVersion\":2}", ((StructValue)dataTable.Rows[0][4]).ToString());
|
||||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][5]);
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[2][0]);
|
||||
Assert.AreEqual("{\"appId\":\"e4a20b59-dd0e-4c50-b074-e8ae4786df30\",\"version\":0,\"lastUpdated\":1564524299648}", dataTable.Rows[2][0].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[5][1]);
|
||||
Assert.AreEqual("{\"path\":\"part-00000-cb6b150b-30b8-4662-ad28-ff32ddab96d2-c000.snappy.parquet\",\"partitionValues\":[],\"size\":404,\"modificationTime\":1564524299000,\"dataChange\":false,\"stats\":null,\"tags\":null}", dataTable.Rows[5][1].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[6][2]);
|
||||
Assert.AreEqual("{\"path\":\"part-00001-185eca06-e017-4dea-ae49-fc48b973e37e-c000.snappy.parquet\",\"deletionTimestamp\":1564524298214,\"dataChange\":false}", dataTable.Rows[6][2].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[1][3]);
|
||||
if (_canHandleNullComplexTypes)
|
||||
Assert.AreEqual("{\"id\":\"22ef18ba-191c-4c36-a606-3dad5cdf3830\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":[]},\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"value\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\"partitionColumns\":null,\"configuration\":[],\"createdTime\":1564524294376}", dataTable.Rows[1][3].ToString());
|
||||
else
|
||||
Assert.AreEqual("{\"id\":\"22ef18ba-191c-4c36-a606-3dad5cdf3830\",\"name\":null,\"description\":null,\"format\":{\"provider\":\"parquet\",\"options\":[]},\"schemaString\":\"{\\\"type\\\":\\\"struct\\\",\\\"fields\\\":[{\\\"name\\\":\\\"value\\\",\\\"type\\\":\\\"integer\\\",\\\"nullable\\\":true,\\\"metadata\\\":{}}]}\",\"partitionColumns\":[],\"configuration\":[],\"createdTime\":1564524294376}", dataTable.Rows[1][3].ToString());
|
||||
Assert.IsInstanceOfType<IStructValue>(dataTable.Rows[0][4]);
|
||||
Assert.AreEqual("{\"minReaderVersion\":1,\"minWriterVersion\":2}", dataTable.Rows[0][4].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task NULLABLE_GUID_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/NULLABLE_GUID_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NULLABLE_GUID_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(33, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.IsFalse(dataTable.Rows[0][22] as bool?);
|
||||
Assert.IsFalse((bool)dataTable.Rows[0][22]);
|
||||
Assert.AreEqual(new Guid("fdcbf90c-20d3-d745-b29f-9c2de1baa979"), dataTable.Rows[0][1]);
|
||||
Assert.AreEqual(new DateTime(2019, 1, 1), dataTable.Rows[0][4]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task MALFORMED_DATETIME_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/MALFORMED_DATETIME_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/MALFORMED_DATETIME_TEST.parquet", default);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(typeof(DateTime), dataTable.Rows[0]["ds"]?.GetType());
|
||||
Assert.IsInstanceOfType<DateTime>(dataTable.Rows[0]["ds"]);
|
||||
Assert.AreEqual(new DateTime(2017, 1, 1), dataTable.Rows[0]["ds"]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task COLUMN_NAME_WITH_FORWARD_SLASH_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/COLUMN_NAME_WITH_FORWARD_SLASH.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/COLUMN_NAME_WITH_FORWARD_SLASH.parquet", default);
|
||||
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(320, parquetEngine.Fields);
|
||||
|
|
@ -314,10 +368,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual((byte)0, dataTable.Rows[0]["FLC K/L"]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task ORACLE_MALFORMED_INT64_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/ORACLE_MALFORMED_INT64_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/ORACLE_MALFORMED_INT64_TEST.parquet", default);
|
||||
|
||||
Assert.AreEqual(126, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
|
@ -327,43 +381,41 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual((long)1, dataTable.Rows[0][1]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_STRUCTS_TEST1()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS1.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS1.parquet", default);
|
||||
Assert.AreEqual(2, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 1, default))(false);
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 2, default))(false);
|
||||
|
||||
Assert.AreEqual("Product1", dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("Product2", dataTable.Rows[1][0]);
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[{\"DateTime\":\"2024-04-15 22:00:00\",\"Quantity\":10},{\"DateTime\":\"2024-04-16 22:00:00\",\"Quantity\":20}]", dataTable.Rows[0][1].ToString());
|
||||
|
||||
dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 1, 1, default))(false);
|
||||
|
||||
Assert.AreEqual("Product2", dataTable.Rows[0][0]);
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[{\"DateTime\":\"2024-04-15 22:00:00\",\"Quantity\":30},{\"DateTime\":\"2024-04-16 22:00:00\",\"Quantity\":40}]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[1][1]);
|
||||
Assert.AreEqual("[{\"DateTime\":\"2024-04-15 22:00:00\",\"Quantity\":30},{\"DateTime\":\"2024-04-16 22:00:00\",\"Quantity\":40}]", dataTable.Rows[1][1].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_STRUCTS_TEST2()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS2.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_STRUCTS2.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(29, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][28]);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][28]);
|
||||
Assert.AreEqual("[{\"purposeId\":\"HF85PyyGFprJXJvh5Pk9tg\",\"status\":\"Granted\",\"externalId\":\"General\",\"date\":\"2025-06-05 14:30:33\"}]", dataTable.Rows[0][28].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task EMPTY_LIST_OF_STRUCTS_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/EMPTY_LIST_OF_STRUCTS.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/EMPTY_LIST_OF_STRUCTS.parquet", default);
|
||||
Assert.AreEqual(2, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -372,18 +424,16 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("Product1", dataTable.Rows[0][0]);
|
||||
Assert.AreEqual("Product2", dataTable.Rows[1][0]);
|
||||
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[0][1]);
|
||||
Assert.IsEmpty(((ListValue)dataTable.Rows[0][1]).Data);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[]", dataTable.Rows[0][1].ToString());
|
||||
Assert.IsInstanceOfType<ListValue>(dataTable.Rows[1][1]);
|
||||
Assert.IsEmpty(((ListValue)dataTable.Rows[1][1]).Data);
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[1][1]);
|
||||
Assert.AreEqual("[]", dataTable.Rows[1][1].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task PARQUET_MR_BREAKING_CHANGE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/PARQUET-MR_1.15.0.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/PARQUET-MR_1.15.0.parquet", default);
|
||||
Assert.AreEqual(5, parquetEngine.RecordCount);
|
||||
Assert.HasCount(7, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -395,14 +445,15 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("John Doe", dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("David Lee", dataTable.Rows[4][1]);
|
||||
|
||||
Assert.IsTrue(dataTable.Rows[0][4] as bool?);
|
||||
Assert.IsTrue(dataTable.Rows[4][4] as bool?);
|
||||
Assert.IsTrue((bool)dataTable.Rows[0][4]);
|
||||
Assert.IsTrue((bool)dataTable.Rows[4][4]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(DuckDBEngineTests), "DuckDB can't open this file")]
|
||||
public async Task DECIMALS_WITH_NO_SCALE_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/DECIMALS_WITH_NO_SCALE_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DECIMALS_WITH_NO_SCALE_TEST.parquet", default);
|
||||
Assert.AreEqual(10589, parquetEngine.RecordCount);
|
||||
Assert.HasCount(8, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -417,10 +468,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(0m, dataTable.Rows[100][7]);
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_LIST_OF_INT()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_INT.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_INT.parquet", default);
|
||||
Assert.AreEqual(3, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -436,10 +487,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("[[1],[],[3],null,[5]]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_LIST_OF_LIST_OF_STRING()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_LIST_OF_STRING.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_LIST_OF_LIST_OF_STRING.parquet", default);
|
||||
Assert.AreEqual(3, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -455,10 +506,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("[[[\"a\",\"b\"],[\"c\",\"d\"],[\"e\"]],[null,[\"f\"]]]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
[SkippableTestMethod]
|
||||
public async Task LIST_OF_STRUCT_OF_LIST_OF_STRUCT()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/LIST_OF_STRUCT_OF_LIST_OF_STRUCT.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_STRUCT_OF_LIST_OF_STRUCT.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -468,10 +519,10 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(expectedJson, dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[TestMethod]
|
||||
public async Task TWO_TIER_TEPEATED_LIST_FIELDS_TEST()
|
||||
[SkippableTestMethod]
|
||||
public async Task TWO_TIER_REPEATED_LIST_FIELDS_TEST()
|
||||
{
|
||||
using var parquetEngine = await ParquetEngine.OpenFileOrFolderAsync("Data/TWO_TIER_TEPEATED_LIST_FIELDS_TEST.parquet", default);
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/TWO_TIER_TEPEATED_LIST_FIELDS_TEST.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(8, parquetEngine.Fields);
|
||||
|
||||
|
|
@ -482,9 +533,200 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual(DBNull.Value, dataTable.Rows[0][2]);
|
||||
Assert.AreEqual("hello", dataTable.Rows[0][3]);
|
||||
Assert.AreEqual("[10,20]", dataTable.Rows[0][4].ToString());
|
||||
Assert.AreEqual("{\"nested\":\"nested!\"}", dataTable.Rows[0][5].ToString());
|
||||
if (_treatsTwoTierListAsStruct)
|
||||
Assert.AreEqual("{\"nested\":\"nested!\"}", dataTable.Rows[0][5].ToString());
|
||||
else
|
||||
Assert.AreEqual(@"[""nested!""]", dataTable.Rows[0][5].ToString());
|
||||
|
||||
Assert.AreEqual("096d06d7-e00b-4f70-ad5c-ca4da9a9630a", dataTable.Rows[0][6]);
|
||||
Assert.AreEqual("[\"element1\",\"element2\"]", dataTable.Rows[0][7].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task CUSTOM_METADATA_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_TYPE_TEST1.parquet", default);
|
||||
|
||||
Assert.Contains("pandas", parquetEngine.CustomMetadata.Keys);
|
||||
const string expectedPandas = "{\"index_columns\":[{\"kind\":\"range\",\"name\":null,\"start\":0,\"stop\":3,\"step\":1}],\"column_indexes\":[{\"name\":null,\"field_name\":null,\"pandas_type\":\"unicode\",\"numpy_type\":\"object\",\"metadata\":{\"encoding\":\"UTF-8\"}}],\"columns\":[{\"name\":\"int64_list\",\"field_name\":\"int64_list\",\"pandas_type\":\"list[int64]\",\"numpy_type\":\"object\",\"metadata\":null},{\"name\":\"utf8_list\",\"field_name\":\"utf8_list\",\"pandas_type\":\"list[unicode]\",\"numpy_type\":\"object\",\"metadata\":null}],\"creator\":{\"library\":\"pyarrow\",\"version\":\"0.15.1\"},\"pandas_version\":\"0.25.3\"}";
|
||||
Assert.AreEqual(TryFormatJSON(expectedPandas), TryFormatJSON(parquetEngine.CustomMetadata["pandas"]));
|
||||
|
||||
Assert.Contains("ARROW:schema", parquetEngine.CustomMetadata.Keys);
|
||||
const string expectedArrow = "/////4ADAAAQAAAAAAAKAA4ABgAFAAgACgAAAAABAwAQAAAAAAAKAAwAAAAEAAgACgAAAHQCAAAEAAAAAQAAAAwAAAAIAAwABAAIAAgAAABMAgAABAAAADwCAAB7ImluZGV4X2NvbHVtbnMiOiBbeyJraW5kIjogInJhbmdlIiwgIm5hbWUiOiBudWxsLCAic3RhcnQiOiAwLCAic3RvcCI6IDMsICJzdGVwIjogMX1dLCAiY29sdW1uX2luZGV4ZXMiOiBbeyJuYW1lIjogbnVsbCwgImZpZWxkX25hbWUiOiBudWxsLCAicGFuZGFzX3R5cGUiOiAidW5pY29kZSIsICJudW1weV90eXBlIjogIm9iamVjdCIsICJtZXRhZGF0YSI6IHsiZW5jb2RpbmciOiAiVVRGLTgifX1dLCAiY29sdW1ucyI6IFt7Im5hbWUiOiAiaW50NjRfbGlzdCIsICJmaWVsZF9uYW1lIjogImludDY0X2xpc3QiLCAicGFuZGFzX3R5cGUiOiAibGlzdFtpbnQ2NF0iLCAibnVtcHlfdHlwZSI6ICJvYmplY3QiLCAibWV0YWRhdGEiOiBudWxsfSwgeyJuYW1lIjogInV0ZjhfbGlzdCIsICJmaWVsZF9uYW1lIjogInV0ZjhfbGlzdCIsICJwYW5kYXNfdHlwZSI6ICJsaXN0W3VuaWNvZGVdIiwgIm51bXB5X3R5cGUiOiAib2JqZWN0IiwgIm1ldGFkYXRhIjogbnVsbH1dLCAiY3JlYXRvciI6IHsibGlicmFyeSI6ICJweWFycm93IiwgInZlcnNpb24iOiAiMC4xNS4xIn0sICJwYW5kYXNfdmVyc2lvbiI6ICIwLjI1LjMifQAAAAAGAAAAcGFuZGFzAAACAAAAYAAAAAQAAACE////AAABDEAAAAAQAAAABAAAAAEAAAAIAAAAqP///6T///8AAAEFFAAAAAwAAAAEAAAAAAAAAMT///8EAAAAaXRlbQAAAAAJAAAAdXRmOF9saXN0AAAA3P///wAAAQxkAAAAFAAAAAQAAAABAAAAHAAAAAQABAAEAAAAEAAUAAgABgAHAAwAAAAQABAAAAAAAAECJAAAABQAAAAEAAAAAAAAAAgADAAIAAcACAAAAAAAAAFAAAAABAAAAGl0ZW0AAAAACgAAAGludDY0X2xpc3QAAA==";
|
||||
Assert.AreEqual(expectedArrow, parquetEngine.CustomMetadata["ARROW:schema"]);
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task DECIMALS_OUTOFRANGE_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/DECIMALS_OUTOFRANGE_TEST.parquet", default);
|
||||
Assert.AreEqual(12, parquetEngine.RecordCount);
|
||||
Assert.HasCount(51, parquetEngine.Fields);
|
||||
|
||||
await Assert.ThrowsAsync<DecimalOverflowException>(() =>
|
||||
parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default));
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(ParquetNETEngineTests), "Our implementation can't open this file")]
|
||||
public async Task LIST_OF_NESTED_STRUCTS_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/LIST_OF_NESTED_STRUCTS_TEST.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
|
||||
Assert.AreEqual("[{\"B\":{\"id\":1}},{\"B\":{\"id\":null}},{\"B\":null}]", dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(ParquetNETEngineTests), "Nested Maps not supported")]
|
||||
public async Task NESTED_MAPS_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NESTED_MAPS_TEST.parquet", default);
|
||||
Assert.AreEqual(6, parquetEngine.RecordCount);
|
||||
Assert.HasCount(3, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 3, default))(false);
|
||||
|
||||
Assert.AreEqual("[(a,[(1,True),(2,False)])]", dataTable.Rows[0][0].ToString());
|
||||
Assert.AreEqual("[(b,[(1,True)])]", dataTable.Rows[1][0].ToString());
|
||||
Assert.AreEqual("[(c,)]", dataTable.Rows[2][0].ToString());
|
||||
|
||||
dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 3, 3, default))(false);
|
||||
|
||||
Assert.AreEqual("[(d,[])]", dataTable.Rows[0][0].ToString());
|
||||
Assert.AreEqual("[(e,[(1,True)])]", dataTable.Rows[1][0].ToString());
|
||||
Assert.AreEqual("[(f,[(3,True),(4,False),(5,True)])]", dataTable.Rows[2][0].ToString());
|
||||
}
|
||||
|
||||
private static string TryFormatJSON(string possibleJSON)
|
||||
{
|
||||
try
|
||||
{
|
||||
var jsonElement = JsonSerializer.Deserialize<JsonElement>(possibleJSON);
|
||||
return JsonSerializer.Serialize(jsonElement, new JsonSerializerOptions { WriteIndented = true });
|
||||
}
|
||||
catch (Exception)
|
||||
{
|
||||
//malformed json detected
|
||||
return possibleJSON;
|
||||
}
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task BYTEARRAY_VALUE_TEST()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/BYTEARRAY_VALUE_TEST.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.RecordCount);
|
||||
Assert.HasCount(1, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.IsInstanceOfType<IByteArrayValue>(dataTable.Rows[0][0]);
|
||||
|
||||
const string expected = "67-33-73-68-61-72-70-5F-73-74-6C-20-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00-00";
|
||||
Assert.AreEqual(expected, dataTable.Rows[0][0].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
[SkipWhen(typeof(ParquetNETEngineTests), "List field is causing issues")]
|
||||
public async Task NESTED_STRUCTS_AND_LISTS()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NESTED_STRUCTS_AND_LISTS.parquet", default);
|
||||
Assert.AreEqual(552, parquetEngine.RecordCount);
|
||||
Assert.HasCount(20, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, 1, default))(false);
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][1]);
|
||||
Assert.AreEqual("[{\"explicit\":null,\"ref_reco\":3,\"text\":\"it is not the case that routine child vaccinations should be mandatory.\"}]", dataTable.Rows[0][1].ToString());
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][11]);
|
||||
Assert.AreEqual("[[\"p\",\"routine child vaccinations, or their side effects, are dangerous\"],[\"q\",\"routine child vaccinations should be mandatory\"]]", dataTable.Rows[0][11].ToString());
|
||||
|
||||
Assert.IsInstanceOfType<IListValue>(dataTable.Rows[0][19]);
|
||||
Assert.AreEqual("[[\"id\",\"argkp_1feffc6a-01eb-4f64-a42f-db898627fbc8\"]]", dataTable.Rows[0][19].ToString());
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task METADATA_TEST1()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/NESTED_STRUCTS_AND_LISTS.parquet", default);
|
||||
Assert.AreEqual(1, parquetEngine.Metadata.ParquetVersion);
|
||||
Assert.AreEqual(552, parquetEngine.Metadata.RowCount);
|
||||
Assert.AreEqual(1, parquetEngine.Metadata.RowGroupCount);
|
||||
Assert.AreEqual("parquet-cpp-arrow version 4.0.1", parquetEngine.Metadata.CreatedBy);
|
||||
Assert.HasCount(1, parquetEngine.Metadata.RowGroups);
|
||||
var rowGroup = parquetEngine.Metadata.RowGroups.First();
|
||||
Assert.AreEqual(33, rowGroup.ColumnCount);
|
||||
Assert.AreEqual(552, rowGroup.RowCount);
|
||||
Assert.AreEqual(2704, rowGroup.FileOffset);
|
||||
Assert.AreEqual(0, rowGroup.Ordinal);
|
||||
Assert.AreEqual(134465, rowGroup.TotalByteSize);
|
||||
Assert.AreEqual(61314, rowGroup.TotalCompressedSize);
|
||||
|
||||
Assert.IsNotNull(rowGroup.Columns);
|
||||
Assert.HasCount(33, rowGroup.Columns);
|
||||
|
||||
var firstColumn = rowGroup.Columns.First();
|
||||
Assert.IsNull(firstColumn.BloomFilterLength);
|
||||
Assert.IsNull(firstColumn.BloomFilterOffset);
|
||||
Assert.AreEqual(0, firstColumn.ColumnId);
|
||||
Assert.AreEqual(1801, firstColumn.DataPageOffset);
|
||||
Assert.AreEqual(4, firstColumn.DictionaryPageOffset);
|
||||
Assert.IsNull(firstColumn.IndexPageOffset);
|
||||
Assert.AreEqual(552, firstColumn.NumValues);
|
||||
Assert.AreEqual("argdown_reconstruction", firstColumn.PathInSchema);
|
||||
Assert.AreEqual(2700, firstColumn.TotalCompressedSize);
|
||||
Assert.AreEqual(10114, firstColumn.TotalUncompressedSize);
|
||||
Assert.AreEqual("BYTE_ARRAY", firstColumn.Type);
|
||||
|
||||
Assert.IsNotNull(firstColumn.Statistics);
|
||||
Assert.IsNull(firstColumn.Statistics.Min);
|
||||
Assert.IsNull(firstColumn.Statistics.Max);
|
||||
Assert.IsNull(firstColumn.Statistics.DistinctCount);
|
||||
Assert.AreEqual(0, firstColumn.Statistics.NullCount);
|
||||
Assert.AreEqual("(1) child vaccination saves lives. (2) if child vaccination saves lives then routine child vaccinations should be mandatory. -- with modus ponens from (1) (2) -- (3) routine child vaccinations should be mandatory.", firstColumn.Statistics.MinValue);
|
||||
Assert.AreEqual("(1) the us offers great opportunities for individuals. (2) if the us offers great opportunities for individuals then the usa is a good country to live in. -- with modus ponens from (1) (2) -- (3) the usa is a good country to live in.", firstColumn.Statistics.MaxValue);
|
||||
Assert.IsNull(firstColumn.Statistics.IsMinValueExact);
|
||||
Assert.IsNull(firstColumn.Statistics.IsMinValueExact);
|
||||
|
||||
var lastColumn = rowGroup.Columns.Last();
|
||||
Assert.IsNull(lastColumn.BloomFilterLength);
|
||||
Assert.IsNull(lastColumn.BloomFilterOffset);
|
||||
Assert.AreEqual(32, lastColumn.ColumnId);
|
||||
Assert.AreEqual(63771, lastColumn.DataPageOffset);
|
||||
Assert.AreEqual(43433, lastColumn.DictionaryPageOffset);
|
||||
Assert.IsNull(lastColumn.IndexPageOffset);
|
||||
Assert.AreEqual(1104, lastColumn.NumValues);
|
||||
Assert.AreEqual($"metadata{_schemaPathSeperator}list{_schemaPathSeperator}item{_schemaPathSeperator}list{_schemaPathSeperator}item", lastColumn.PathInSchema);
|
||||
Assert.AreEqual(21830, lastColumn.TotalCompressedSize);
|
||||
Assert.AreEqual(27163, lastColumn.TotalUncompressedSize);
|
||||
Assert.AreEqual("BYTE_ARRAY", lastColumn.Type);
|
||||
|
||||
Assert.IsNotNull(lastColumn.Statistics);
|
||||
Assert.IsNull(lastColumn.Statistics.Min);
|
||||
Assert.IsNull(lastColumn.Statistics.Max);
|
||||
Assert.IsNull(lastColumn.Statistics.DistinctCount);
|
||||
Assert.AreEqual(0, lastColumn.Statistics.NullCount);
|
||||
Assert.AreEqual("argkp_007a45bc-7a3b-4030-8178-33d7c5fa5cb8", lastColumn.Statistics.MinValue);
|
||||
Assert.AreEqual("id", lastColumn.Statistics.MaxValue);
|
||||
Assert.IsNull(lastColumn.Statistics.IsMinValueExact);
|
||||
Assert.IsNull(lastColumn.Statistics.IsMinValueExact);
|
||||
}
|
||||
|
||||
[SkippableTestMethod]
|
||||
public async Task DATETTIME_ONLY_TYPE_PYARROW_V22()
|
||||
{
|
||||
using var parquetEngine = await OpenFileOrFolderAsync("Data/TIME_ONLY_TYPE_PYARROW_V22.parquet", default);
|
||||
|
||||
Assert.AreEqual(4626, parquetEngine.RecordCount);
|
||||
Assert.HasCount(2, parquetEngine.Fields);
|
||||
|
||||
var dataTable = (await parquetEngine.ReadRowsAsync(parquetEngine.Fields, 0, int.MaxValue, default))(false);
|
||||
Assert.AreEqual(new DateOnly(2024, 1, 1), dataTable.Rows[0][0]);
|
||||
Assert.AreEqual(new TimeOnly(215720000000), dataTable.Rows[0][1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -288,7 +288,7 @@ namespace ParquetViewer.Tests
|
|||
[TestMethod]
|
||||
public void ByteArrayValue_IsCorrectlyTruncated()
|
||||
{
|
||||
var byteArrayValue = new ByteArrayValue("test", [0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10]);
|
||||
var byteArrayValue = new Engine.Types.ByteArrayValue([0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x10]);
|
||||
Assert.AreEqual("01[...]10", byteArrayValue.ToStringTruncated(1));
|
||||
Assert.AreEqual("01[...]10", byteArrayValue.ToStringTruncated(2));
|
||||
Assert.AreEqual("01-02[...]09-10", byteArrayValue.ToStringTruncated(11));
|
||||
|
|
@ -297,4 +297,4 @@ namespace ParquetViewer.Tests
|
|||
Assert.AreEqual("01-02-03-04-05-06-07-08-09-10", byteArrayValue.ToString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<TargetFramework>net8.0-windows</TargetFramework>
|
||||
<TargetFramework>net10.0-windows</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
<IsPackable>false</IsPackable>
|
||||
|
|
@ -28,6 +28,8 @@
|
|||
<PackageReference Include="RichardSzalay.MockHttp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\ParquetViewer.Engine.DuckDB\ParquetViewer.Engine.DuckDB.csproj" />
|
||||
<ProjectReference Include="..\ParquetViewer.Engine.ParquetNET\ParquetViewer.Engine.ParquetNET.csproj" />
|
||||
<ProjectReference Include="..\ParquetViewer.Engine\ParquetViewer.Engine.csproj" />
|
||||
<ProjectReference Include="..\ParquetViewer\ParquetViewer.csproj" />
|
||||
</ItemGroup>
|
||||
|
|
|
|||
15
src/ParquetViewer.Tests/SkipWhenAttribute.cs
Normal file
15
src/ParquetViewer.Tests/SkipWhenAttribute.cs
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
namespace ParquetViewer.Tests
|
||||
{
|
||||
[AttributeUsage(AttributeTargets.Method, AllowMultiple = true)]
|
||||
public class SkipWhenAttribute : Attribute
|
||||
{
|
||||
public Type TestClassToSkip { get; }
|
||||
public string? Reason { get; set; }
|
||||
|
||||
public SkipWhenAttribute(Type testClassToSkip, string? reason)
|
||||
{
|
||||
TestClassToSkip = testClassToSkip;
|
||||
Reason = reason;
|
||||
}
|
||||
}
|
||||
}
|
||||
38
src/ParquetViewer.Tests/SkippableTestMethodAttribute.cs
Normal file
38
src/ParquetViewer.Tests/SkippableTestMethodAttribute.cs
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace ParquetViewer.Tests
|
||||
{
|
||||
internal class SkippableTestMethodAttribute : TestMethodAttribute
|
||||
{
|
||||
public SkippableTestMethodAttribute([CallerFilePath] string callerFilePath = "", [CallerLineNumber] int callerLineNumber = -1)
|
||||
: base(callerFilePath, callerLineNumber)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
public override Task<TestResult[]> ExecuteAsync(ITestMethod testMethod)
|
||||
{
|
||||
var methodInfo = testMethod.MethodInfo;
|
||||
var skipAttrs = methodInfo.GetCustomAttributes(typeof(SkipWhenAttribute), inherit: true)
|
||||
.Cast<SkipWhenAttribute>()
|
||||
.ToList();
|
||||
|
||||
var skipAttribute = skipAttrs.FirstOrDefault(a => a.TestClassToSkip.FullName == testMethod.TestClassName);
|
||||
if (skipAttribute is not null)
|
||||
{
|
||||
var result = new TestResult
|
||||
{
|
||||
Outcome = UnitTestOutcome.Inconclusive, // treated as skipped in MSTest
|
||||
TestFailureException = null
|
||||
};
|
||||
result.TestContextMessages
|
||||
= $"Test skipped for {testMethod.TestClassName}.{testMethod.TestMethodName}" +
|
||||
$"{(skipAttribute.Reason is not null ? $" {skipAttribute.Reason}" : string.Empty)}.";
|
||||
|
||||
return Task.FromResult(new[] { result });
|
||||
}
|
||||
|
||||
return base.ExecuteAsync(testMethod);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -40,4 +40,4 @@ namespace ParquetViewer.Tests
|
|||
public bool AnalyticsDataGatheringConsent => true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 17
|
||||
VisualStudioVersion = 17.4.33213.308
|
||||
# Visual Studio Version 18
|
||||
VisualStudioVersion = 18.1.11312.151
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParquetViewer", "ParquetViewer\ParquetViewer.csproj", "{6019FC1B-3610-4682-BF96-8345C95CB7EC}"
|
||||
EndProject
|
||||
|
|
@ -9,6 +9,17 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParquetViewer.Engine", "Par
|
|||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "ParquetViewer.Tests", "ParquetViewer.Tests\ParquetViewer.Tests.csproj", "{16D10BC9-08BF-4248-8975-1B54C42EB2C2}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParquetViewer.Engine.DuckDB", "ParquetViewer.Engine.DuckDB\ParquetViewer.Engine.DuckDB.csproj", "{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ParquetViewer.Engine.ParquetNET", "ParquetViewer.Engine.ParquetNET\ParquetViewer.Engine.ParquetNET.csproj", "{4B69AD86-BDF8-01E8-59B8-E690760BB827}"
|
||||
EndProject
|
||||
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution Items", "{F5D39637-1812-4802-8DB3-254CBBE5C313}"
|
||||
ProjectSection(SolutionItems) = preProject
|
||||
.editorconfig = .editorconfig
|
||||
..\.github\workflows\build-test-publish.yaml = ..\.github\workflows\build-test-publish.yaml
|
||||
..\.github\workflows\generate-translations-template.yaml = ..\.github\workflows\generate-translations-template.yaml
|
||||
EndProjectSection
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
|
|
@ -16,24 +27,36 @@ Global
|
|||
Release|Any CPU = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.ActiveCfg = Release_SelfContained|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Release_SelfContained|Any CPU.ActiveCfg = Release_SelfContained|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{6019FC1B-3610-4682-BF96-8345C95CB7EC}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release_SelfContained|Any CPU.Build.0 = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{77900356-25F3-4A24-B638-845C784C1175}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release_SelfContained|Any CPU.ActiveCfg = Release_SelfContained|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{16D10BC9-08BF-4248-8975-1B54C42EB2C2}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release_SelfContained|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release_SelfContained|Any CPU.Build.0 = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{D00ACD9C-20B0-4E4A-8CC9-9DEC941D7747}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release_SelfContained|Any CPU.ActiveCfg = Release_SelfContained|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release_SelfContained|Any CPU.Build.0 = Release_SelfContained|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{4B69AD86-BDF8-01E8-59B8-E690760BB827}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
|
|
|||
|
|
@ -168,9 +168,9 @@ namespace ParquetViewer
|
|||
}
|
||||
else if (success == false)
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.FileAssociationFailedErrorMessageFormat.Format(exitCode),
|
||||
Resources.Errors.FileAssociationFailedErrorTitle,
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.FileAssociationFailedErrorMessageFormat.Format(exitCode),
|
||||
Resources.Errors.FileAssociationFailedErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
SetCheckboxSilent(!associateFileExtensionCheckBox.Checked);
|
||||
}
|
||||
|
|
@ -209,7 +209,7 @@ namespace ParquetViewer
|
|||
this.newVersionLabel.Image = Resources.Icons.external_link_icon;
|
||||
}
|
||||
else if (latestRelease.Version == Env.AssemblyVersion)
|
||||
{
|
||||
{
|
||||
this.newVersionLabel.Enabled = false;
|
||||
}
|
||||
}
|
||||
|
|
@ -291,4 +291,4 @@ namespace ParquetViewer
|
|||
Process.Start(new ProcessStartInfo(url.ToString()) { UseShellExecute = true });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
using System;
|
||||
using ParquetViewer.Exceptions;
|
||||
using System;
|
||||
using System.Collections;
|
||||
using System.Collections.Generic;
|
||||
using System.Text.Json.Serialization;
|
||||
|
|
@ -23,6 +24,9 @@ namespace ParquetViewer.Analytics
|
|||
public long ReadTimeMS { get; set; }
|
||||
public long IndexTimeMS { get; set; }
|
||||
public long RenderTimeMS { get; set; }
|
||||
[JsonIgnore]
|
||||
public ParquetEngineTypeId EngineType { get; set; }
|
||||
public string EngineTypeName => EngineType.ToString();
|
||||
|
||||
public FileOpenEvent() : base(EVENT_TYPE)
|
||||
{
|
||||
|
|
@ -30,8 +34,8 @@ namespace ParquetViewer.Analytics
|
|||
}
|
||||
|
||||
public static void FireAndForget(bool isFolder, int numPartitions, long numRows, int numRowGroups, int numFields,
|
||||
string[] fieldTypes, long recordOffset, long recordCount, int numLoadedFields,
|
||||
long totalLoadTimeMilliseconds, long readTimeMS, long indexTimeMS, long renderTimeMS)
|
||||
string[] fieldTypes, long recordOffset, long recordCount, int numLoadedFields, long totalLoadTimeMilliseconds,
|
||||
long readTimeMS, long indexTimeMS, long renderTimeMS, ParquetEngineTypeId engineType)
|
||||
{
|
||||
var _ = new FileOpenEvent
|
||||
{
|
||||
|
|
@ -47,9 +51,16 @@ namespace ParquetViewer.Analytics
|
|||
LoadTimeMS = totalLoadTimeMilliseconds,
|
||||
ReadTimeMS = readTimeMS,
|
||||
IndexTimeMS = indexTimeMS,
|
||||
RenderTimeMS = renderTimeMS
|
||||
RenderTimeMS = renderTimeMS,
|
||||
EngineType = engineType,
|
||||
}.Record();
|
||||
}
|
||||
|
||||
public enum ParquetEngineTypeId
|
||||
{
|
||||
ParquetNET,
|
||||
DuckDB
|
||||
}
|
||||
}
|
||||
|
||||
public class FileExportEvent : AmplitudeEvent
|
||||
|
|
@ -114,7 +125,7 @@ namespace ParquetViewer.Analytics
|
|||
AboutBox,
|
||||
UserGuide,
|
||||
DragDrop,
|
||||
LoadAllRows
|
||||
LoadAllRows,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -169,9 +180,18 @@ namespace ParquetViewer.Analytics
|
|||
this.Exception = ex ?? throw new ArgumentNullException(nameof(ex));
|
||||
}
|
||||
|
||||
public static void FireAndForget(System.Exception ex)
|
||||
public static void FireAndForget(Exception ex)
|
||||
{
|
||||
var _ = new ExceptionEvent(ex).Record();
|
||||
if (ex is RowsReadException rre)
|
||||
{
|
||||
//Record two separate exceptions for both parquet.net and duckdb
|
||||
var _ = new ExceptionEvent(rre.ParquetNetException).Record()
|
||||
.ContinueWith((_) => _ = new ExceptionEvent(rre.DuckDbException).Record());
|
||||
}
|
||||
else
|
||||
{
|
||||
var _ = new ExceptionEvent(ex).Record();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -210,10 +230,11 @@ namespace ParquetViewer.Analytics
|
|||
private const string EVENT_TYPE = "sql.execute";
|
||||
|
||||
public bool IsValid { get; set; }
|
||||
public int RecordCountTotal { get; set; }
|
||||
public int? RecordCountTotal { get; set; }
|
||||
public int? RecordCountFiltered { get; set; }
|
||||
public int ColumnCount { get; set; }
|
||||
public int? ColumnCount { get; set; }
|
||||
public long RunTimeMS { get; set; }
|
||||
public bool IsDuckDB { get; set; }
|
||||
|
||||
public ExecuteQueryEvent() : base(EVENT_TYPE)
|
||||
{
|
||||
|
|
@ -237,4 +258,4 @@ namespace ParquetViewer.Analytics
|
|||
var _ = new ColumnFormattedEvent(formatName).Record();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -20,4 +20,4 @@ namespace ParquetViewer.Analytics
|
|||
ConsentProvider = consentProvider;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
{
|
||||
public bool AnalyticsDataGatheringConsent => AppSettings.AnalyticsDataGatheringConsent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,4 +4,4 @@
|
|||
{
|
||||
public bool AnalyticsDataGatheringConsent { get; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -20,6 +20,7 @@ namespace ParquetViewer
|
|||
private const string CustomDateFormatKey = "CustomDateFormat";
|
||||
private const string DarkModeKey = "DarkMode";
|
||||
private const string UserSelectedCultureKey = "UserSelectedCulture";
|
||||
private const string QueryEditorZoomLevelKey = "QueryEditorZoomLevel";
|
||||
|
||||
public static DateFormat DateTimeDisplayFormat
|
||||
{
|
||||
|
|
@ -109,11 +110,17 @@ namespace ParquetViewer
|
|||
public static CultureInfo? UserSelectedCulture
|
||||
{
|
||||
get => ReadRegistryValue(UserSelectedCultureKey, out string? value) ?
|
||||
(UtilityMethods.TryParseCultureInfo(value, out CultureInfo? cultureInfo) ? cultureInfo : null)
|
||||
(UtilityMethods.TryParseCultureInfo(value, out CultureInfo? cultureInfo) ? cultureInfo : null)
|
||||
: null;
|
||||
set => SetRegistryValue(UserSelectedCultureKey, value?.ToString() ?? string.Empty);
|
||||
}
|
||||
|
||||
public static int? QueryEditorZoomLevel
|
||||
{
|
||||
get => ReadRegistryValue(QueryEditorZoomLevelKey, out int value) ? value : null;
|
||||
set => SetRegistryValue(QueryEditorZoomLevelKey, value);
|
||||
}
|
||||
|
||||
private static bool ReadRegistryValue<T>(string key, [NotNullWhen(true)] out T? value)
|
||||
{
|
||||
try
|
||||
|
|
|
|||
|
|
@ -62,7 +62,7 @@ namespace ParquetViewer.Controls
|
|||
try
|
||||
{
|
||||
//Prepare audio stream
|
||||
if (this.Value is ByteArrayValue byteArray)
|
||||
if (this.Value is IByteArrayValue byteArray)
|
||||
{
|
||||
this._audioStream = GetAudioStream(byteArray.Data, out var audioFormat);
|
||||
this._audioFormat = audioFormat;
|
||||
|
|
@ -79,7 +79,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
else
|
||||
{
|
||||
throw new InvalidDataException($"{this.ValueType.Name} was not the expected type {nameof(ByteArrayValue)}");
|
||||
throw new InvalidDataException($"{this.ValueType.Name} was not the expected type {nameof(IByteArrayValue)}");
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
|
|
@ -112,7 +112,7 @@ namespace ParquetViewer.Controls
|
|||
this.RedrawCell();
|
||||
}
|
||||
|
||||
protected override void Paint(Graphics graphics, Rectangle clipBounds, Rectangle cellBounds, int rowIndex, DataGridViewElementStates cellState, object value, object formattedValue, string errorText, DataGridViewCellStyle cellStyle, DataGridViewAdvancedBorderStyle advancedBorderStyle, DataGridViewPaintParts paintParts)
|
||||
protected override void Paint(Graphics graphics, Rectangle clipBounds, Rectangle cellBounds, int rowIndex, DataGridViewElementStates cellState, object? value, object? formattedValue, string? errorText, DataGridViewCellStyle cellStyle, DataGridViewAdvancedBorderStyle advancedBorderStyle, DataGridViewPaintParts paintParts)
|
||||
{
|
||||
InitializePlayerAsync(); //Trigger initialization if it wasn't performed yet
|
||||
|
||||
|
|
@ -359,7 +359,7 @@ namespace ParquetViewer.Controls
|
|||
if (this.DataGridView is null) //just in case
|
||||
return;
|
||||
|
||||
if (this.Value is not ByteArrayValue byteArrayValue)
|
||||
if (this.Value is not IByteArrayValue byteArrayValue)
|
||||
return;
|
||||
|
||||
if (this._audioFormat is null || this._audioFormat == AudioFormat.Invalid)
|
||||
|
|
@ -380,7 +380,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
CleanupFile(saveFileDialog.FileName); //Delete any existing file (user already confirmed any overwrite)
|
||||
|
||||
if (this.Value is not ByteArrayValue byteArray)
|
||||
if (this.Value is not IByteArrayValue byteArray)
|
||||
throw new InvalidDataException("Audio data was not found");
|
||||
|
||||
await File.WriteAllBytesAsync(saveFileDialog.FileName, byteArray.Data);
|
||||
|
|
@ -484,7 +484,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public enum AudioFormat
|
||||
{
|
||||
Invalid,
|
||||
|
|
|
|||
|
|
@ -51,4 +51,4 @@ namespace ParquetViewer.Controls
|
|||
_tooltip.SetToolTip(this, text);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
using System;
|
||||
using System.ComponentModel;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ParquetViewer.Controls
|
||||
|
|
@ -34,6 +35,7 @@ namespace ParquetViewer.Controls
|
|||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public int DelayedTextChangedTimeout { get; set; }
|
||||
|
||||
protected virtual void OnDelayedTextChanged(EventArgs e)
|
||||
|
|
@ -91,4 +93,4 @@ namespace ParquetViewer.Controls
|
|||
OnDelayedTextChanged(EventArgs.Empty);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -79,4 +79,4 @@ namespace ParquetViewer.Controls
|
|||
_openForms.Remove(this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -4,6 +4,7 @@ using ParquetViewer.Engine.Types;
|
|||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.ComponentModel;
|
||||
using System.Data;
|
||||
using System.Drawing;
|
||||
using System.Linq;
|
||||
|
|
@ -22,6 +23,7 @@ namespace ParquetViewer.Controls
|
|||
const string FORMATTING_ERROR_TEXT = "#ERR";
|
||||
|
||||
private Theme _gridTheme = Theme.LightModeTheme;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Theme GridTheme
|
||||
{
|
||||
get => _gridTheme;
|
||||
|
|
@ -35,9 +37,16 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Image? CopyToClipboardIcon { get; set; } = null;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Image? CopyAsWhereIcon { get; set; } = null;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public bool ShowCopyAsWhereContextMenuItem { get; set; } = false;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public string ColumnNameEscapeFormat { get; set; } = "[{0}]";
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public string DateValueEscapeFormat { get; set; } = "#{0}#";
|
||||
|
||||
private readonly HashSet<int> clickableColumnIndexes = new();
|
||||
private readonly Dictionary<(int, int), QuickPeekForm> openQuickPeekForms = new();
|
||||
|
|
@ -45,10 +54,11 @@ namespace ParquetViewer.Controls
|
|||
private DataGridViewCellStyle? hyperlinkCellStyleCache;
|
||||
private bool isLeftClickButtonDown = false;
|
||||
private ContextMenuStrip? _contextMenu = null;
|
||||
private ContextMenuStrip? _headerContextMenu = null;
|
||||
private static readonly Regex _validColumnNameRegex = new Regex("^[a-zA-Z0-9_]+$");
|
||||
|
||||
//We keep track of format overrides with the column name so we can keep formatting the same if the user adds/removes fields from the same file
|
||||
private readonly Dictionary<string, ByteArrayValue.DisplayFormat> byteArrayColumnsWithFormatOverrides = new();
|
||||
private readonly Dictionary<string, IByteArrayValue.DisplayFormat> byteArrayColumnsWithFormatOverrides = new();
|
||||
private readonly Dictionary<string, FloatDisplayFormat> floatColumnsWithFormatOverrides = new();
|
||||
|
||||
public ParquetGridView() : base()
|
||||
|
|
@ -72,7 +82,6 @@ namespace ParquetViewer.Controls
|
|||
this.clickableColumnIndexes.Clear();
|
||||
base.OnDataSourceChanged(e); //This runs OnColumnAdded() for all columns before continuing.
|
||||
|
||||
UpdateDateFormats();
|
||||
SetColumnCellStyles();
|
||||
AutoSizeColumns();
|
||||
}
|
||||
|
|
@ -87,13 +96,13 @@ namespace ParquetViewer.Controls
|
|||
{
|
||||
checkboxColumn.ThreeState = true;
|
||||
}
|
||||
else if (column.ValueType == typeof(ListValue)
|
||||
|| column.ValueType == typeof(MapValue)
|
||||
|| column.ValueType == typeof(StructValue))
|
||||
else if (column.ValueType.ImplementsInterface<IListValue>()
|
||||
|| column.ValueType.ImplementsInterface<IMapValue>()
|
||||
|| column.ValueType.ImplementsInterface<IStructValue>())
|
||||
{
|
||||
column.DefaultCellStyle = GetHyperlinkCellStyle(column);
|
||||
}
|
||||
else if (column.ValueType == typeof(ByteArrayValue))
|
||||
else if (column.ValueType.ImplementsInterface<IByteArrayValue>())
|
||||
{
|
||||
//Check if this column contains images
|
||||
for (var i = 0; i < this.Rows.Count; i++)
|
||||
|
|
@ -101,7 +110,7 @@ namespace ParquetViewer.Controls
|
|||
var cellValue = this[column.Index, i].Value;
|
||||
if (cellValue != DBNull.Value)
|
||||
{
|
||||
var isImage = ((ByteArrayValue)cellValue).ToImage(out var image);
|
||||
var isImage = ((IByteArrayValue)cellValue!).ToImage(out var image);
|
||||
if (isImage)
|
||||
{
|
||||
column.DefaultCellStyle = GetHyperlinkCellStyle(column);
|
||||
|
|
@ -111,21 +120,36 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//Reset any changed stylings
|
||||
column.DefaultCellStyle = new DataGridViewCellStyle();
|
||||
}
|
||||
}
|
||||
|
||||
UpdateDateFormats();
|
||||
}
|
||||
|
||||
public void UpdateDateFormats()
|
||||
{
|
||||
string dateFormat = AppSettings.DateTimeDisplayFormat.GetDateFormat();
|
||||
string dateOnlyFormat = AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat();
|
||||
string timeOnlyFormat = AppSettings.DateTimeDisplayFormat.GetTimeOnlyFormat();
|
||||
|
||||
foreach (DataGridViewColumn column in this.Columns)
|
||||
{
|
||||
if (column.ValueType == typeof(DateTime))
|
||||
column.DefaultCellStyle.Format = dateFormat;
|
||||
else if (column.ValueType == typeof(DateOnly))
|
||||
column.DefaultCellStyle.Format = dateOnlyFormat;
|
||||
else if (column.ValueType == typeof(TimeOnly))
|
||||
column.DefaultCellStyle.Format = timeOnlyFormat;
|
||||
}
|
||||
|
||||
//Need to tell the parquet engine how to render date values
|
||||
ParquetEngineSettings.DateDisplayFormat = dateFormat;
|
||||
ParquetEngineSettings.DateOnlyDisplayFormat = dateOnlyFormat;
|
||||
ParquetEngineSettings.TimeOnlyDisplayFormat = timeOnlyFormat;
|
||||
}
|
||||
|
||||
protected override void OnCellPainting(DataGridViewCellPaintingEventArgs e)
|
||||
|
|
@ -140,8 +164,8 @@ namespace ParquetViewer.Controls
|
|||
e.PaintBackground(e.CellBounds, true);
|
||||
e.PaintContent(e.CellBounds);
|
||||
|
||||
WidenColumnForIndicator(this.Columns[e.ColumnIndex], e.Graphics!, e.CellStyle!.Font, false);
|
||||
var length = MeasureStringWidth(e.Graphics!, e.CellStyle.Font, e.FormattedValue?.ToString() ?? string.Empty, false);
|
||||
WidenColumnForIndicator(this.Columns[e.ColumnIndex], e.Graphics!, e.CellStyle!.Font!, false);
|
||||
var length = MeasureStringWidth(e.Graphics!, e.CellStyle.Font!, e.FormattedValue?.ToString() ?? string.Empty, false);
|
||||
var drawPoint = new Point(e.CellBounds.Left + length - 2, e.CellBounds.Y + 4);
|
||||
TextRenderer.DrawText(e.Graphics!, "*", e.CellStyle!.Font, drawPoint, e.CellStyle.ForeColor, TextFormatFlags.PreserveGraphicsClipping);
|
||||
|
||||
|
|
@ -156,7 +180,7 @@ namespace ParquetViewer.Controls
|
|||
e.Paint(e.CellBounds, DataGridViewPaintParts.All
|
||||
& ~(DataGridViewPaintParts.ContentForeground));
|
||||
|
||||
var font = new Font(e.CellStyle!.Font, FontStyle.Italic);
|
||||
var font = new Font(e.CellStyle!.Font!, FontStyle.Italic);
|
||||
var color = this.GridTheme.CellPlaceholderTextColor;
|
||||
if (e.State.HasFlag(DataGridViewElementStates.Selected))
|
||||
color = Color.White;
|
||||
|
|
@ -250,15 +274,31 @@ namespace ParquetViewer.Controls
|
|||
int columnIndex = this.HitTest(e.X, e.Y).ColumnIndex;
|
||||
|
||||
if (rowIndex >= 0 && columnIndex >= 0
|
||||
&& this[columnIndex, rowIndex].Value is StructValue structValue
|
||||
&& structValue.IsHuggingFaceImageFormat(out var data))
|
||||
&& this[columnIndex, rowIndex].Value is IStructValue structValue
|
||||
&& structValue.IsHuggingFaceFormat(out var data))
|
||||
{
|
||||
using var ms = new System.IO.MemoryStream(data);
|
||||
var image = Image.FromStream(ms); //quick peek form will dispose of this image when closed
|
||||
Image? image;
|
||||
try
|
||||
{
|
||||
using var ms = new System.IO.MemoryStream(data);
|
||||
image = Image.FromStream(ms); //quick peek form will dispose of this image when closed
|
||||
}
|
||||
catch (ArgumentException)
|
||||
{
|
||||
//Data is not an image
|
||||
image = null;
|
||||
}
|
||||
catch
|
||||
{
|
||||
throw;
|
||||
}
|
||||
|
||||
var uniqueCellTag = Guid.NewGuid();
|
||||
var quickPeekForm = new QuickPeekForm(this.Columns[columnIndex].Name, image, uniqueCellTag, rowIndex, columnIndex);
|
||||
ShowQuickPeekForm(quickPeekForm, this[columnIndex, rowIndex], uniqueCellTag, QuickPeekEvent.DataTypeId.Image);
|
||||
if (image is not null)
|
||||
{
|
||||
var uniqueCellTag = Guid.NewGuid();
|
||||
var quickPeekForm = new QuickPeekForm(this.Columns[columnIndex].Name, image, uniqueCellTag, rowIndex, columnIndex);
|
||||
ShowQuickPeekForm(quickPeekForm, this[columnIndex, rowIndex], uniqueCellTag, QuickPeekEvent.DataTypeId.Image);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -314,7 +354,7 @@ namespace ParquetViewer.Controls
|
|||
var dataType = QuickPeekEvent.DataTypeId.Unknown;
|
||||
QuickPeekForm? quickPeekForm = null;
|
||||
var uniqueCellTag = Guid.NewGuid();
|
||||
if (clickedCell.Value is ListValue listValue)
|
||||
if (clickedCell.Value is IListValue listValue)
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.List;
|
||||
|
||||
|
|
@ -330,7 +370,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, dt, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
}
|
||||
else if (clickedCell.Value is MapValue mapValue)
|
||||
else if (clickedCell.Value is IMapValue mapValue)
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.Map;
|
||||
|
||||
|
|
@ -348,15 +388,14 @@ namespace ParquetViewer.Controls
|
|||
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, dt, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
}
|
||||
else if (clickedCell.Value is StructValue structValue)
|
||||
else if (clickedCell.Value is IStructValue structValue)
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.Struct;
|
||||
|
||||
|
||||
var dt = structValue.ToDataTable();
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, dt, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
}
|
||||
else if (clickedCell.Value is ByteArrayValue byteArray && byteArray.ToImage(out var image))
|
||||
else if (clickedCell.Value is IByteArrayValue byteArray && byteArray.ToImage(out var image))
|
||||
{
|
||||
dataType = QuickPeekEvent.DataTypeId.Image;
|
||||
quickPeekForm = new QuickPeekForm(this.Columns[e.ColumnIndex].Name, image!, uniqueCellTag, e.RowIndex, e.ColumnIndex);
|
||||
|
|
@ -389,7 +428,8 @@ namespace ParquetViewer.Controls
|
|||
|
||||
this.ClearSelection();
|
||||
this.FirstDisplayedScrollingRowIndex = cellToReturnTo.RowIndex;
|
||||
this.FirstDisplayedScrollingColumnIndex = tag.SourceColumnIndex;
|
||||
if (!this.Columns[tag.SourceColumnIndex].Frozen)
|
||||
this.FirstDisplayedScrollingColumnIndex = tag.SourceColumnIndex;
|
||||
this[cellToReturnTo.ColumnIndex, cellToReturnTo.RowIndex].Selected = true;
|
||||
this.CurrentCell = cellToReturnTo;
|
||||
this.Focus();
|
||||
|
|
@ -496,7 +536,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
|
||||
if (cellValueType == typeof(ByteArrayValue) && e.Value is ByteArrayValue byteArrayValue)
|
||||
if (cellValueType.ImplementsInterface<IByteArrayValue>() && e.Value is IByteArrayValue byteArrayValue)
|
||||
{
|
||||
//Don't truncate the binary data if this is a copy to clipboard operation
|
||||
int charLimit = this.isCopyingToClipboard ? int.MaxValue : MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL;
|
||||
|
|
@ -528,22 +568,25 @@ namespace ParquetViewer.Controls
|
|||
e.FormattingApplied = true;
|
||||
}
|
||||
}
|
||||
else if (cellValueType == typeof(StructValue) && e.Value is StructValue structValue)
|
||||
else if (cellValueType.ImplementsInterface<IStructValue>() && e.Value is IStructValue structValue)
|
||||
{
|
||||
e.Value = structValue.ToStringTruncated(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL);
|
||||
e.FormattingApplied = true;
|
||||
}
|
||||
else if (cellValueType == typeof(ListValue) && e.Value is ListValue listValue)
|
||||
else if (cellValueType.ImplementsInterface<IListValue>() && e.Value is IListValue listValue)
|
||||
{
|
||||
e.Value = listValue.ToString().Left(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL - 3, "...");
|
||||
e.Value = listValue.ToString()!.Left(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL - 3, "...");
|
||||
e.FormattingApplied = true;
|
||||
}
|
||||
}
|
||||
|
||||
protected override void OnSorted(EventArgs e)
|
||||
{
|
||||
using var graphics = this.CreateGraphics();
|
||||
WidenColumnForIndicator(this.SortedColumn, graphics, this.Font, true);
|
||||
if (this.SortedColumn is not null)
|
||||
{
|
||||
using var graphics = this.CreateGraphics();
|
||||
WidenColumnForIndicator(this.SortedColumn, graphics, this.Font, true);
|
||||
}
|
||||
base.OnSorted(e);
|
||||
}
|
||||
|
||||
|
|
@ -568,14 +611,23 @@ namespace ParquetViewer.Controls
|
|||
|
||||
protected override void OnColumnHeaderMouseClick(DataGridViewCellMouseEventArgs e)
|
||||
{
|
||||
this.Cursor = Cursors.WaitCursor;
|
||||
if (e.Button == MouseButtons.Left)
|
||||
this.Cursor = Cursors.WaitCursor;
|
||||
|
||||
try
|
||||
{
|
||||
base.OnColumnHeaderMouseClick(e); //This will trigger the sort operation and the OnSorted event if it's a left-click
|
||||
|
||||
if (e.Button == MouseButtons.Right)
|
||||
{
|
||||
ShowDisplayFormatOptions(e.ColumnIndex);
|
||||
this._headerContextMenu?.Dispose();
|
||||
this._headerContextMenu = new ContextMenuStrip();
|
||||
|
||||
AddFrozenOption(this._headerContextMenu.Items, e.ColumnIndex);
|
||||
AddDisplayFormatOptions(this._headerContextMenu.Items, e.ColumnIndex);
|
||||
|
||||
if (this._headerContextMenu.Items.Count > 0)
|
||||
this._headerContextMenu.Show(Cursor.Position);
|
||||
}
|
||||
}
|
||||
finally
|
||||
|
|
@ -651,7 +703,7 @@ namespace ParquetViewer.Controls
|
|||
const int MAX_WIDTH = 360;
|
||||
const int DECIMAL_PREFERRED_WIDTH = 180;
|
||||
|
||||
if (this.DataSource is not DataTable gridTable)
|
||||
if (this.DataSource is not DataTable gridTable || this.Columns.Count == 0)
|
||||
return;
|
||||
|
||||
var maxWidth = MAX_WIDTH;
|
||||
|
|
@ -683,12 +735,28 @@ namespace ParquetViewer.Controls
|
|||
//We can just measure a few without going through all of them.
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<DateTime>(i).ToString(AppSettings.DateTimeDisplayFormat.GetDateFormat()))
|
||||
.Take(100);
|
||||
.Take(25);
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(StructValue))
|
||||
else if (gridTable.Columns[i].DataType == typeof(DateOnly))
|
||||
{
|
||||
//All date only's will probably have the same string length so no need to go through all values.
|
||||
//We can just measure a few without going through all of them.
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<DateOnly>(i).ToString(AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat()))
|
||||
.Take(10);
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(TimeOnly))
|
||||
{
|
||||
//All date only's will probably have the same string length so no need to go through all values.
|
||||
//We can just measure a few without going through all of them.
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<TimeOnly>(i).ToString(AppSettings.DateTimeDisplayFormat.GetTimeOnlyFormat()))
|
||||
.Take(25);
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType.ImplementsInterface<IStructValue>())
|
||||
{
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => row.Field<StructValue>(i)!.ToStringTruncated(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL));
|
||||
.Select(row => row.Field<IStructValue>(i)!.ToStringTruncated(MAX_CHARACTERS_THAT_CAN_BE_RENDERED_IN_A_CELL));
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(float)
|
||||
&& this.floatColumnsWithFormatOverrides.TryGetValue(gridTable.Columns[i].ColumnName, out var displayFormat)
|
||||
|
|
@ -720,16 +788,16 @@ namespace ParquetViewer.Controls
|
|||
//Allow longer than preferred width if header is longer
|
||||
maxWidth = Math.Max(newColumnSize, DECIMAL_PREFERRED_WIDTH);
|
||||
}
|
||||
else if (this.Columns[i].CellTemplate.GetType() == typeof(AudioPlayerDataGridViewCell))
|
||||
else if (this.Columns[i].CellTemplate!.GetType() == typeof(AudioPlayerDataGridViewCell))
|
||||
{
|
||||
this.Columns[i].Width = Math.Min(Math.Max(240, newColumnSize), maxWidth);
|
||||
return;
|
||||
}
|
||||
else if (gridTable.Columns[i].DataType == typeof(ByteArrayValue)
|
||||
else if (gridTable.Columns[i].DataType.ImplementsInterface<IByteArrayValue>()
|
||||
&& this.byteArrayColumnsWithFormatOverrides.TryGetValue(gridTable.Columns[i].ColumnName, out var byteArrayDisplayFormat))
|
||||
{
|
||||
colStringCollection = nonNullColumnValues
|
||||
.Select(row => FormatByteArrayString(row.Field<ByteArrayValue>(i)!, byteArrayDisplayFormat, 1000 /*1000 chars seems like a good max limit*/));
|
||||
.Select(row => FormatByteArrayString(row.Field<IByteArrayValue>(i)!, byteArrayDisplayFormat, 1000 /*1000 chars seems like a good max limit*/));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -824,7 +892,7 @@ namespace ParquetViewer.Controls
|
|||
this.clickableColumnIndexes.Add(column.Index);
|
||||
this.hyperlinkCellStyleCache ??= new DataGridViewCellStyle(column.DefaultCellStyle)
|
||||
{
|
||||
Font = new(column.DefaultCellStyle.Font ?? column.InheritedStyle.Font, FontStyle.Underline),
|
||||
Font = new(column.DefaultCellStyle.Font ?? column.InheritedStyle!.Font!, FontStyle.Underline),
|
||||
ForeColor = this.GridTheme.HyperlinkColor
|
||||
};
|
||||
return this.hyperlinkCellStyleCache;
|
||||
|
|
@ -836,9 +904,6 @@ namespace ParquetViewer.Controls
|
|||
this.DefaultCellStyle.ForeColor = this.GridTheme.TextColor;
|
||||
this.DefaultCellStyle.SelectionBackColor = this.GridTheme.SelectionBackColor;
|
||||
|
||||
this.ColumnHeadersDefaultCellStyle.BackColor = this.GridTheme.ColumnHeaderColor;
|
||||
this.ColumnHeadersDefaultCellStyle.ForeColor = this.GridTheme.TextColor;
|
||||
|
||||
this.RowHeadersDefaultCellStyle.BackColor = this.GridTheme.RowHeaderColor;
|
||||
this.RowHeadersDefaultCellStyle.ForeColor = this.GridTheme.TextColor;
|
||||
this.RowHeadersDefaultCellStyle.SelectionBackColor = this.GridTheme.SelectionBackColor;
|
||||
|
|
@ -858,6 +923,7 @@ namespace ParquetViewer.Controls
|
|||
WrapMode = DataGridViewTriState.True
|
||||
};
|
||||
|
||||
StyleFrozenColumns();
|
||||
SetColumnCellStyles();
|
||||
}
|
||||
|
||||
|
|
@ -880,23 +946,23 @@ namespace ParquetViewer.Controls
|
|||
.GroupBy(cell => cell.ColumnIndex)
|
||||
.OrderBy(column => column.Key))
|
||||
{
|
||||
DataTable? dataTable = this.DataSource as DataTable;
|
||||
var cellValues = selectedCellsByColumn.Select(cell => this[cell.ColumnIndex, cell.RowIndex].Value);
|
||||
var cellValues = selectedCellsByColumn.Select(cell => this[cell.ColumnIndex, cell.RowIndex].Value!);
|
||||
var columnIndex = selectedCellsByColumn.Key;
|
||||
var column = this.Columns[columnIndex];
|
||||
columnsAndValuesToFilterBy.Add((column.Name, column.ValueType, cellValues.ToArray()));
|
||||
columnsAndValuesToFilterBy.Add((column.Name, column.ValueType!, cellValues.ToArray()));
|
||||
}
|
||||
|
||||
var filterQuery = GenerateFilterQuery(columnsAndValuesToFilterBy);
|
||||
if (filterQuery.Length < new TextBox().MaxLength) //This length check doesn't make the most sense but I wanted to put some kind of cap on this.
|
||||
var filterQuery = GenerateFilterQuery(columnsAndValuesToFilterBy, this.ColumnNameEscapeFormat, this.DateValueEscapeFormat);
|
||||
if (filterQuery.Length < new TextBox().MaxLength)
|
||||
{
|
||||
Clipboard.SetText(filterQuery, TextDataFormat.Text);
|
||||
}
|
||||
else
|
||||
{
|
||||
//If the query is too long to fit in our query box, show an error
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.CopyAsWhereTooLargeErrorMessage,
|
||||
Resources.Errors.CopyAsWhereTooLargeErrorTitle,
|
||||
Resources.Errors.CopyAsWhereTooLargeErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
|
|
@ -904,8 +970,14 @@ namespace ParquetViewer.Controls
|
|||
public static string GenerateFilterQuery(string columnName, Type valueType, object value)
|
||||
=> GenerateFilterQuery(new() { (columnName, valueType, [value]) });
|
||||
|
||||
public static string GenerateFilterQuery(List<(string ColumnName, Type ValueType, object[] Values)> columnsAndValuesToFilterBy)
|
||||
public static string GenerateFilterQuery(List<(string ColumnName, Type ValueType, object[] Values)> columnsAndValuesToFilterBy,
|
||||
string columnNameEscapeFormat = "[{0}]", string dateValueEscapeFormat = "#{0}#")
|
||||
{
|
||||
if (columnNameEscapeFormat.Length < 5)
|
||||
throw new ArgumentException("Column name escape format is too short.", nameof(columnNameEscapeFormat));
|
||||
if (dateValueEscapeFormat.Length < 5)
|
||||
throw new ArgumentException("Date value escape format is too short.", nameof(dateValueEscapeFormat));
|
||||
|
||||
var queryBuilder = new StringBuilder();
|
||||
if (columnsAndValuesToFilterBy is null || columnsAndValuesToFilterBy.Count == 0)
|
||||
{
|
||||
|
|
@ -920,10 +992,10 @@ namespace ParquetViewer.Controls
|
|||
ArgumentNullException.ThrowIfNull(values);
|
||||
|
||||
//Wrap column name in brackets if it contains spaces or punctuation (if it isn't wrapped already)
|
||||
var isAlreadyWrapped = columnName.StartsWith("[") && columnName.EndsWith("]");
|
||||
var isAlreadyWrapped = columnName.StartsWith(columnNameEscapeFormat.First()) && columnName.EndsWith(columnNameEscapeFormat.Last());
|
||||
if (!isAlreadyWrapped && !_validColumnNameRegex.IsMatch(columnName))
|
||||
{
|
||||
columnName = $"[{columnName}]";
|
||||
columnName = string.Format(columnNameEscapeFormat, columnName);
|
||||
}
|
||||
|
||||
var hasNulls = values.Any(value => value == DBNull.Value || value is null);
|
||||
|
|
@ -977,7 +1049,7 @@ namespace ParquetViewer.Controls
|
|||
if (valueType == typeof(DateTime))
|
||||
{
|
||||
//Use a standard date format so the query is always syntactically correct
|
||||
queryBuilder.Append($"#{((DateTime)value).ToString("yyyy-MM-dd HH:mm:ss.FFFFFFF")}#");
|
||||
queryBuilder.AppendFormat(dateValueEscapeFormat, ((DateTime)value).ToString("yyyy-MM-dd HH:mm:ss.FFFFFFF"));
|
||||
}
|
||||
else if (valueType.IsNumber())
|
||||
{
|
||||
|
|
@ -1011,21 +1083,22 @@ namespace ParquetViewer.Controls
|
|||
return queryBuilder.ToString();
|
||||
}
|
||||
|
||||
private void ShowDisplayFormatOptions(int columnIndex)
|
||||
private void AddDisplayFormatOptions(ToolStripItemCollection contextMenu, int columnIndex)
|
||||
{
|
||||
//If this is a byte array column, show available formatting options
|
||||
if (this.Columns[columnIndex].ValueType == typeof(ByteArrayValue)
|
||||
&& this.Columns[columnIndex].CellTemplate.GetType() != typeof(AudioPlayerDataGridViewCell))
|
||||
if (this.Columns[columnIndex].ValueType.ImplementsInterface<IByteArrayValue>()
|
||||
&& this.Columns[columnIndex].CellTemplate?.GetType() != typeof(AudioPlayerDataGridViewCell))
|
||||
{
|
||||
AddSeperatorIfNeeded();
|
||||
const int RECORDS_TO_INTERSECT_COUNT = 8;
|
||||
|
||||
//Find a few different non-null values and find the common display formats that all of them support.
|
||||
//This will reduce the chance the user sees #ERR in the cells from bad formatting conversions.
|
||||
int intersectCounter = RECORDS_TO_INTERSECT_COUNT;
|
||||
IEnumerable<ByteArrayValue.DisplayFormat> possibleDisplayFormats = Enum.GetValues<ByteArrayValue.DisplayFormat>();
|
||||
IEnumerable<IByteArrayValue.DisplayFormat> possibleDisplayFormats = Enum.GetValues<IByteArrayValue.DisplayFormat>();
|
||||
for (var i = 0; i < this.RowCount; i++)
|
||||
{
|
||||
if (this[columnIndex, i].Value is not ByteArrayValue byteArrayValue)
|
||||
if (this[columnIndex, i].Value is not IByteArrayValue byteArrayValue)
|
||||
continue;
|
||||
|
||||
possibleDisplayFormats = possibleDisplayFormats.Intersect(byteArrayValue.PossibleDisplayFormats);
|
||||
|
|
@ -1041,7 +1114,6 @@ namespace ParquetViewer.Controls
|
|||
possibleDisplayFormats = [default];
|
||||
}
|
||||
|
||||
var columnHeaderContextMenu = new ContextMenuStrip();
|
||||
foreach (var supportedFormat in possibleDisplayFormats)
|
||||
{
|
||||
var columnName = this.Columns[columnIndex].Name;
|
||||
|
|
@ -1057,20 +1129,17 @@ namespace ParquetViewer.Controls
|
|||
this.Refresh(); //Force a re-draw to render updated format
|
||||
this.AutoSizeColumns(columnIndex); //Re-size the column
|
||||
};
|
||||
columnHeaderContextMenu.Items.Add(toolstripMenuItem);
|
||||
contextMenu.Add(toolstripMenuItem);
|
||||
|
||||
if (!byteArrayColumnsWithFormatOverrides.TryGetValue(columnName, out var displayFormat))
|
||||
displayFormat = default;
|
||||
|
||||
toolstripMenuItem.Checked = displayFormat == supportedFormat;
|
||||
}
|
||||
|
||||
columnHeaderContextMenu.Show(Cursor.Position);
|
||||
}
|
||||
else if (this.Columns[columnIndex].ValueType == typeof(float) || this.Columns[columnIndex].ValueType == typeof(double))
|
||||
{
|
||||
var columnHeaderContextMenu = new ContextMenuStrip();
|
||||
|
||||
AddSeperatorIfNeeded();
|
||||
var columnName = this.Columns[columnIndex].Name;
|
||||
if (!floatColumnsWithFormatOverrides.TryGetValue(columnName, out var displayFormat))
|
||||
displayFormat = default;
|
||||
|
|
@ -1089,7 +1158,7 @@ namespace ParquetViewer.Controls
|
|||
this.Refresh(); //Force a re-draw to render updated format
|
||||
this.AutoSizeColumns(columnIndex); //Re-size the column
|
||||
};
|
||||
columnHeaderContextMenu.Items.Add(scientificNotationMenuItem);
|
||||
contextMenu.Add(scientificNotationMenuItem);
|
||||
|
||||
var decimalNotationMenuItem = new ToolStripMenuItem(Resources.Strings.DecimalFormatting)
|
||||
{ Checked = displayFormat == FloatDisplayFormat.Decimal };
|
||||
|
|
@ -1105,9 +1174,56 @@ namespace ParquetViewer.Controls
|
|||
this.Refresh(); //Force a re-draw to render updated format
|
||||
this.AutoSizeColumns(columnIndex); //Re-size the column
|
||||
};
|
||||
columnHeaderContextMenu.Items.Add(decimalNotationMenuItem);
|
||||
contextMenu.Add(decimalNotationMenuItem);
|
||||
}
|
||||
|
||||
columnHeaderContextMenu.Show(Cursor.Position);
|
||||
void AddSeperatorIfNeeded()
|
||||
{
|
||||
if (contextMenu.Count > 0)
|
||||
contextMenu.Add(new ToolStripSeparator());
|
||||
}
|
||||
}
|
||||
|
||||
private void AddFrozenOption(ToolStripItemCollection items, int columnIndex)
|
||||
{
|
||||
var column = this.Columns[columnIndex];
|
||||
|
||||
//Only show the option to freeze if the horizontal scroll bar is visible or if the column is already frozen
|
||||
if (!column.Frozen && !this.HorizontalScrollBar.Visible)
|
||||
return;
|
||||
|
||||
var menuItem = new ToolStripMenuItem(Resources.Strings.FrozenColumnText)
|
||||
{ Checked = column.Frozen };
|
||||
|
||||
menuItem.Click += (object? _, EventArgs _) =>
|
||||
{
|
||||
column.Frozen = !column.Frozen;
|
||||
this.StyleFrozenColumns();
|
||||
};
|
||||
|
||||
items.Add(menuItem);
|
||||
}
|
||||
|
||||
private void StyleFrozenColumns()
|
||||
{
|
||||
//First reset styles for all column headers
|
||||
for (var i = 0; i < this.Columns.Count; i++)
|
||||
{
|
||||
this.Columns[i].HeaderCell.Style = new DataGridViewCellStyle();
|
||||
}
|
||||
|
||||
//Reset cells
|
||||
SetColumnCellStyles();
|
||||
|
||||
//Now style frozen ones (We need to go by DisplayIndex in case the user re-arranged the columns)
|
||||
var columnsInOrderByDisplayIndex = this.Columns.AsEnumerable().OrderBy(col => col.DisplayIndex);
|
||||
foreach (var column in columnsInOrderByDisplayIndex)
|
||||
{
|
||||
if (!column.Frozen)
|
||||
break;
|
||||
|
||||
column.DefaultCellStyle.BackColor = this.GridTheme.FrozenCellBackgroundColor;
|
||||
column.HeaderCell.Style.BackColor = this.GridTheme.FrozenColumnHeaderColor;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1119,12 +1235,12 @@ namespace ParquetViewer.Controls
|
|||
/// <returns>String representation of the binary data in the desired format if possible.
|
||||
/// If conversion fails, <see cref="FORMATTING_ERROR_TEXT"/> is returned instead</returns>
|
||||
/// <remarks>Utilize <see cref="ByteArrayValue.PossibleDisplayFormats"/> to avoid calling incompatible conversions</remarks>
|
||||
private static string FormatByteArrayString(ByteArrayValue byteArrayValue, ByteArrayValue.DisplayFormat desiredFormat, int desiredLength = int.MaxValue)
|
||||
private static string FormatByteArrayString(IByteArrayValue byteArrayValue, IByteArrayValue.DisplayFormat desiredFormat, int desiredLength = int.MaxValue)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(byteArrayValue);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(desiredLength, 1);
|
||||
|
||||
if (desiredFormat == ByteArrayValue.DisplayFormat.IPv4)
|
||||
if (desiredFormat == IByteArrayValue.DisplayFormat.IPv4)
|
||||
{
|
||||
if (byteArrayValue.ToIPv4(out var ipAddress))
|
||||
{
|
||||
|
|
@ -1133,7 +1249,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.IPv6)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.IPv6)
|
||||
{
|
||||
if (byteArrayValue.ToIPv6(out var ipAddress))
|
||||
{
|
||||
|
|
@ -1142,7 +1258,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Guid)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Guid)
|
||||
{
|
||||
if (byteArrayValue.ToGuid(out var @guid))
|
||||
{
|
||||
|
|
@ -1151,7 +1267,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Short)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Short)
|
||||
{
|
||||
if (byteArrayValue.ToShort(out var @short))
|
||||
{
|
||||
|
|
@ -1160,7 +1276,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Integer)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Integer)
|
||||
{
|
||||
if (byteArrayValue.ToInteger(out var @int))
|
||||
{
|
||||
|
|
@ -1169,7 +1285,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Long)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Long)
|
||||
{
|
||||
if (byteArrayValue.ToLong(out var @long))
|
||||
{
|
||||
|
|
@ -1178,7 +1294,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Float)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Float)
|
||||
{
|
||||
if (byteArrayValue.ToFloat(out var @float))
|
||||
{
|
||||
|
|
@ -1187,7 +1303,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Double)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Double)
|
||||
{
|
||||
if (byteArrayValue.ToDouble(out var @double))
|
||||
{
|
||||
|
|
@ -1196,7 +1312,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.ASCII)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.ASCII)
|
||||
{
|
||||
if (byteArrayValue.ToASCII(out var ascii))
|
||||
{
|
||||
|
|
@ -1208,7 +1324,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return FORMATTING_ERROR_TEXT;
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Base64)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Base64)
|
||||
{
|
||||
byteArrayValue.ToBase64(out var base64);
|
||||
if (base64.Length <= desiredLength)
|
||||
|
|
@ -1216,7 +1332,7 @@ namespace ParquetViewer.Controls
|
|||
|
||||
return base64[..desiredLength] + "[...]";
|
||||
}
|
||||
else if (desiredFormat == ByteArrayValue.DisplayFormat.Size)
|
||||
else if (desiredFormat == IByteArrayValue.DisplayFormat.Size)
|
||||
{
|
||||
return byteArrayValue.Data.Length.ToString() + (byteArrayValue.Data.Length == 1 ? " byte" : " bytes");
|
||||
}
|
||||
|
|
@ -1234,7 +1350,7 @@ namespace ParquetViewer.Controls
|
|||
//Check for audio data
|
||||
foreach (DataGridViewColumn column in this.Columns)
|
||||
{
|
||||
if (column.ValueType == typeof(ByteArrayValue))
|
||||
if (column.ValueType.ImplementsInterface<IByteArrayValue>())
|
||||
{
|
||||
var isAudioColumn = false;
|
||||
var tryCount = 0;
|
||||
|
|
@ -1247,8 +1363,9 @@ namespace ParquetViewer.Controls
|
|||
if (value == DBNull.Value)
|
||||
continue;
|
||||
|
||||
byte[] data = ((ByteArrayValue)value).Data;
|
||||
if (AudioPlayerDataGridViewCell.IsAudio(data, out var _))
|
||||
var byteArray = (IByteArrayValue)value;
|
||||
if (AudioPlayerDataGridViewCell.IsAudio(byteArray.Data, out var _)
|
||||
&& !byteArray.ToImage(out _)) //help prevent false positives by checking for image data
|
||||
{
|
||||
isAudioColumn = true;
|
||||
break;
|
||||
|
|
@ -1272,7 +1389,7 @@ namespace ParquetViewer.Controls
|
|||
public void DisposeAudioCells()
|
||||
{
|
||||
foreach (var audioColumn in this.Columns.Cast<DataGridViewColumn>()
|
||||
.Where(column => column.CellTemplate.GetType() == typeof(AudioPlayerDataGridViewCell)))
|
||||
.Where(column => column.CellTemplate?.GetType() == typeof(AudioPlayerDataGridViewCell)))
|
||||
{
|
||||
foreach (DataGridViewRow row in this.Rows)
|
||||
{
|
||||
|
|
@ -1287,6 +1404,9 @@ namespace ParquetViewer.Controls
|
|||
//dispose any AudioPlayerDataGridViewCells to free resources and stop ongoing playback.
|
||||
this.DisposeAudioCells();
|
||||
|
||||
this._contextMenu?.Dispose();
|
||||
this._headerContextMenu?.Dispose();
|
||||
|
||||
base.Dispose(disposing);
|
||||
}
|
||||
|
||||
|
|
@ -1296,4 +1416,4 @@ namespace ParquetViewer.Controls
|
|||
Decimal
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.ComponentModel;
|
||||
using System.Data;
|
||||
using System.Drawing;
|
||||
using System.Threading.Tasks;
|
||||
|
|
@ -12,6 +13,7 @@ namespace ParquetViewer.Controls
|
|||
private readonly string originalTitle = string.Empty;
|
||||
|
||||
private string titleSuffix = string.Empty;
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public string TitleSuffix
|
||||
{
|
||||
get => titleSuffix;
|
||||
|
|
@ -30,8 +32,11 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public Guid UniqueTag { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public int SourceRowIndex { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public int SourceColumnIndex { get; set; }
|
||||
|
||||
public event EventHandler<TakeMeBackEventArgs>? TakeMeBackEvent;
|
||||
|
|
@ -86,6 +91,11 @@ namespace ParquetViewer.Controls
|
|||
{
|
||||
width += column.Width;
|
||||
}
|
||||
if (this.mainGridView.Rows.Count > 8) //8 is a magic number... Better than nothing imo
|
||||
{
|
||||
width += 24; //widen for scrollbar
|
||||
}
|
||||
|
||||
this.Width = Math.Min(Math.Max(width, 280), 900); //900 pixel max seems reasonable, right?
|
||||
|
||||
if (this.mainGridView.Rows.Count == 1)
|
||||
|
|
@ -95,7 +105,7 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
else if (this.mainPictureBox is not null)
|
||||
{
|
||||
this.Text += $" ({Resources.Strings.DimensionsText}: {this.mainPictureBox.Image.PhysicalDimension.Width} x {this.mainPictureBox.Image.PhysicalDimension.Height})";
|
||||
this.Text += $" ({Resources.Strings.DimensionsText}: {this.mainPictureBox.Image!.PhysicalDimension.Width} x {this.mainPictureBox.Image.PhysicalDimension.Height})";
|
||||
this.Text += $" ({Resources.Strings.TypeText}: {this.mainPictureBox.Image.RawFormat})";
|
||||
|
||||
this.Width = Math.Max(Math.Min((int)(Screen.FromControl(this).WorkingArea.Width / 1.8), this.mainPictureBox.Image.Width), 400);
|
||||
|
|
@ -142,7 +152,7 @@ namespace ParquetViewer.Controls
|
|||
{
|
||||
using var saveFileDialog = new SaveFileDialog
|
||||
{
|
||||
Filter = $"{this.mainPictureBox.Image.RawFormat.ToString().ToUpperInvariant()} image|*.{this.mainPictureBox.Image.RawFormat.ToString().ToLowerInvariant()}",
|
||||
Filter = $"{this.mainPictureBox.Image!.RawFormat.ToString().ToUpperInvariant()} image|*.{this.mainPictureBox.Image.RawFormat.ToString().ToLowerInvariant()}",
|
||||
Title = Resources.Strings.SaveImageAsButtonText.Format(this.mainPictureBox.Image.RawFormat.ToString().ToUpperInvariant())
|
||||
};
|
||||
|
||||
|
|
@ -154,8 +164,8 @@ namespace ParquetViewer.Controls
|
|||
bitmap.Save(saveFileDialog.FileName, this.mainPictureBox.Image.RawFormat);
|
||||
|
||||
MessageBox.Show(this,
|
||||
Resources.Strings.ImageSavedToDiskMessage.Format(saveFileDialog.FileName),
|
||||
Resources.Strings.ImageSavedToDiskTitle,
|
||||
Resources.Strings.ImageSavedToDiskMessage.Format(saveFileDialog.FileName),
|
||||
Resources.Strings.ImageSavedToDiskTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Information);
|
||||
}
|
||||
}
|
||||
|
|
@ -165,7 +175,7 @@ namespace ParquetViewer.Controls
|
|||
try
|
||||
{
|
||||
this.mainPictureBox.Cursor = Cursors.WaitCursor;
|
||||
Clipboard.SetImage(this.mainPictureBox.Image);
|
||||
Clipboard.SetImage(this.mainPictureBox.Image!);
|
||||
await Task.Delay(100); //allow cursor to change
|
||||
}
|
||||
finally
|
||||
|
|
@ -197,4 +207,4 @@ namespace ParquetViewer.Controls
|
|||
public int SourceRowIndex { get; } = sourceRowIndex;
|
||||
public int SourceColumnIndex { get; } = sourceColumnIndex;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
using ParquetViewer;
|
||||
using System;
|
||||
using System.ComponentModel;
|
||||
using System.Drawing;
|
||||
using System.Windows.Forms;
|
||||
using System.Windows.Forms.VisualStyles;
|
||||
|
|
@ -21,6 +22,7 @@ public class StylableCheckBox : CheckBox
|
|||
/// <summary>
|
||||
/// Gets or sets the foreground color of the checkbox label if a checkbox is disabled
|
||||
/// </summary>
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public Color DisabledForeColor
|
||||
{
|
||||
get;
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
using System.Drawing;
|
||||
using System.ComponentModel;
|
||||
using System.Drawing;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ParquetViewer.Controls
|
||||
{
|
||||
public class ThemableToolStripSeperator : ToolStripSeparator
|
||||
{
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Visible)]
|
||||
public new Color BackColor { get; set; } = Color.Transparent;
|
||||
|
||||
/// <remarks>
|
||||
|
|
@ -23,4 +25,4 @@ namespace ParquetViewer.Controls
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12,7 +12,7 @@ namespace ParquetViewer
|
|||
public partial class CustomDateFormatInputForm : FormBase
|
||||
{
|
||||
public string UserEnteredDateFormat => this.desiredDateFormatTextBox.Text;
|
||||
|
||||
|
||||
public CustomDateFormatInputForm()
|
||||
{
|
||||
InitializeComponent();
|
||||
|
|
@ -85,8 +85,8 @@ namespace ParquetViewer
|
|||
else
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.InvalidDateFormatErrorMessage,
|
||||
Resources.Errors.InvalidDateFormatErrorTitle,
|
||||
Resources.Errors.InvalidDateFormatErrorMessage,
|
||||
Resources.Errors.InvalidDateFormatErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
|
|
@ -104,4 +104,4 @@ namespace ParquetViewer
|
|||
this.dateFormatDocsLinkLabel.ActiveLinkColor = theme.ActiveHyperlinkColor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1759,7 +1759,7 @@
|
|||
<value>17</value>
|
||||
</data>
|
||||
<data name="instructionsTableLayoutPanel.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>554, 254</value>
|
||||
<value>554, 236</value>
|
||||
</data>
|
||||
<data name="instructionsTableLayoutPanel.TabIndex" type="System.Int32, mscorlib">
|
||||
<value>1</value>
|
||||
|
|
@ -1894,7 +1894,7 @@
|
|||
<value>6</value>
|
||||
</data>
|
||||
<data name="mainTableLayoutPanel.Size" type="System.Drawing.Size, System.Drawing">
|
||||
<value>578, 373</value>
|
||||
<value>578, 355</value>
|
||||
</data>
|
||||
<data name="mainTableLayoutPanel.TabIndex" type="System.Int32, mscorlib">
|
||||
<value>0</value>
|
||||
|
|
@ -1939,7 +1939,10 @@
|
|||
<value>7, 15</value>
|
||||
</data>
|
||||
<data name="$this.ClientSize" type="System.Drawing.Size, System.Drawing">
|
||||
<value>578, 373</value>
|
||||
<value>578, 355</value>
|
||||
</data>
|
||||
<data name="$this.MaximumSize" type="System.Drawing.Size, System.Drawing">
|
||||
<value>900, 750</value>
|
||||
</data>
|
||||
<data name="$this.StartPosition" type="System.Windows.Forms.FormStartPosition, System.Windows.Forms">
|
||||
<value>CenterParent</value>
|
||||
|
|
|
|||
|
|
@ -6,4 +6,4 @@ namespace ParquetViewer.Exceptions
|
|||
{
|
||||
public InvalidQueryException(Exception? ex = null) : base(Resources.Errors.InvalidQueryErrorMessage, ex) { }
|
||||
}
|
||||
}
|
||||
}
|
||||
16
src/ParquetViewer/Exceptions/RowsReadException.cs
Normal file
16
src/ParquetViewer/Exceptions/RowsReadException.cs
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
using System;
|
||||
|
||||
namespace ParquetViewer.Exceptions
|
||||
{
|
||||
public class RowsReadException : Exception
|
||||
{
|
||||
public RowsReadException(Exception parquetNetEx, Exception duckDbEx, string? message = null) : base(message, new AggregateException([parquetNetEx, duckDbEx]))
|
||||
{
|
||||
this.ParquetNetException = parquetNetEx;
|
||||
this.DuckDbException = duckDbEx;
|
||||
}
|
||||
|
||||
public Exception ParquetNetException { get; }
|
||||
public Exception DuckDbException { get; }
|
||||
}
|
||||
}
|
||||
|
|
@ -6,9 +6,9 @@ namespace ParquetViewer.Exceptions
|
|||
{
|
||||
internal class UnsupportedAssemblyVersionException : Exception
|
||||
{
|
||||
public UnsupportedAssemblyVersionException(string unsupportedAssemblyVersion, Exception? ex = null)
|
||||
public UnsupportedAssemblyVersionException(string unsupportedAssemblyVersion, Exception? ex = null)
|
||||
: base(Resources.Errors.UnexpectedAssemblyVersionErrorFormat.Format(unsupportedAssemblyVersion), ex) { }
|
||||
|
||||
public static void Record(string unsupportedAssemblyVersion) => ExceptionEvent.FireAndForget(new UnsupportedAssemblyVersionException(unsupportedAssemblyVersion));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -3,7 +3,7 @@ using System;
|
|||
|
||||
namespace ParquetViewer.Exceptions
|
||||
{
|
||||
public class XlsCellLengthException: Exception
|
||||
public class XlsCellLengthException : Exception
|
||||
{
|
||||
public readonly FileType FileType = FileType.XLS;
|
||||
|
||||
|
|
@ -15,4 +15,4 @@ namespace ParquetViewer.Exceptions
|
|||
this.MaxLength = maxLength;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,9 +1,8 @@
|
|||
using Parquet.Schema;
|
||||
using ParquetViewer.Controls;
|
||||
using ParquetViewer.Controls;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.ComponentModel;
|
||||
using System.Drawing;
|
||||
using System.Linq;
|
||||
using System.Windows.Forms;
|
||||
|
|
@ -16,8 +15,11 @@ namespace ParquetViewer
|
|||
private const int DynamicFieldCheckboxYIncrement = 30;
|
||||
private const int MaxNumberOfFieldsWeCanRender = 5000;
|
||||
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public List<string> PreSelectedFields { get; set; }
|
||||
public List<Field> AvailableFields { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public List<string> AvailableFields { get; set; }
|
||||
[DesignerSerializationVisibility(DesignerSerializationVisibility.Hidden)]
|
||||
public List<string> NewSelectedFields { get; set; }
|
||||
|
||||
private string _selectedFieldsOnlyLabelTemplate;
|
||||
|
|
@ -25,14 +27,14 @@ namespace ParquetViewer
|
|||
public FieldsToLoadForm()
|
||||
{
|
||||
InitializeComponent();
|
||||
this.AvailableFields ??= new List<Field>();
|
||||
this.AvailableFields ??= new List<string>();
|
||||
this.PreSelectedFields ??= new List<string>();
|
||||
this.NewSelectedFields ??= new List<string>();
|
||||
this._selectedFieldsOnlyLabelTemplate = this.showSelectedFieldsRadioButton.Text;
|
||||
this.SetSelectedFieldCount();
|
||||
}
|
||||
|
||||
public FieldsToLoadForm(IEnumerable<Field> availableFields, IEnumerable<string> preSelectedFields) : this()
|
||||
public FieldsToLoadForm(IEnumerable<string> availableFields, IEnumerable<string> preSelectedFields) : this()
|
||||
{
|
||||
this.AvailableFields = availableFields?.ToList() ?? new();
|
||||
this.PreSelectedFields = preSelectedFields?.ToList() ?? new();
|
||||
|
|
@ -44,7 +46,7 @@ namespace ParquetViewer
|
|||
this.RenderFieldsCheckboxes(this.AvailableFields, this.PreSelectedFields);
|
||||
}
|
||||
|
||||
private void RenderFieldsCheckboxes(List<Field> availableFields, List<string>? preSelectedFields)
|
||||
private void RenderFieldsCheckboxes(List<string> availableFields, List<string>? preSelectedFields)
|
||||
{
|
||||
this.fieldsPanel.SuspendLayout(); //Suspending the layout while dynamically adding controls adds significant performance improvement
|
||||
this.ClearAndDisposeCheckboxes();
|
||||
|
|
@ -68,7 +70,7 @@ namespace ParquetViewer
|
|||
bool isClearingSelectAllCheckbox = false;
|
||||
|
||||
var checkboxControls = new List<CheckBox>();
|
||||
foreach (Field field in availableFields)
|
||||
foreach (string field in availableFields)
|
||||
{
|
||||
if (isFirst) //Add toggle all checkbox and some other setting changes
|
||||
{
|
||||
|
|
@ -81,12 +83,8 @@ namespace ParquetViewer
|
|||
}
|
||||
|
||||
var totalFieldCount = availableFields.Count;
|
||||
var supportedFieldCount = availableFields.Where(IsSupportedFieldType).Count();
|
||||
var unsupportedFieldCount = totalFieldCount - supportedFieldCount;
|
||||
var unsupportedFieldsText = unsupportedFieldCount > 0 ? $" - {Resources.Strings.UnsupportedFieldCountTextFormat.Format(unsupportedFieldCount)}" : string.Empty;
|
||||
|
||||
string selectAllCheckBoxText = Resources.Strings.SelectAllCheckmarkTextFormat.Format(supportedFieldCount + unsupportedFieldsText);
|
||||
string deselectAllCheckBoxText = Resources.Strings.DeselectAllCheckmarkTextFormat.Format(supportedFieldCount + unsupportedFieldsText);
|
||||
string selectAllCheckBoxText = Resources.Strings.SelectAllCheckmarkTextFormat.Format(totalFieldCount);
|
||||
string deselectAllCheckBoxText = Resources.Strings.DeselectAllCheckmarkTextFormat.Format(totalFieldCount);
|
||||
var selectAllCheckbox = new CheckboxWithTooltip(this.fieldsPanel)
|
||||
{
|
||||
Name = SelectAllCheckboxName,
|
||||
|
|
@ -127,17 +125,16 @@ namespace ParquetViewer
|
|||
locationY += DynamicFieldCheckboxYIncrement;
|
||||
}
|
||||
|
||||
bool isUnsupportedFieldType = !IsSupportedFieldType(field, out var unsupportedReason);
|
||||
var fieldCheckbox = new CheckboxWithTooltip(this.fieldsPanel)
|
||||
{
|
||||
Name = string.Concat("checkbox_", field.Name),
|
||||
Text = string.Concat(field.Name, isUnsupportedFieldType ? $" {Resources.Strings.UnsupportedFieldText}" : string.Empty),
|
||||
Tag = field.Name,
|
||||
Checked = preSelectedFields?.Contains(field.Name) == true,
|
||||
Name = string.Concat("checkbox_", field),
|
||||
Text = field,
|
||||
Tag = field,
|
||||
Checked = preSelectedFields?.Contains(field) == true,
|
||||
Location = new Point(locationX, locationY),
|
||||
DisabledForeColor = _disabledTextColor,
|
||||
AutoSize = true,
|
||||
Enabled = !isUnsupportedFieldType
|
||||
Enabled = true
|
||||
};
|
||||
fieldCheckbox.CheckedChanged += (object? checkboxSender, EventArgs checkboxEventArgs) =>
|
||||
{
|
||||
|
|
@ -178,11 +175,6 @@ namespace ParquetViewer
|
|||
};
|
||||
checkboxControls.Add(fieldCheckbox);
|
||||
|
||||
if (isUnsupportedFieldType)
|
||||
{
|
||||
fieldCheckbox.SetTooltip(unsupportedReason!);
|
||||
}
|
||||
|
||||
locationY += DynamicFieldCheckboxYIncrement;
|
||||
}
|
||||
|
||||
|
|
@ -222,67 +214,6 @@ namespace ParquetViewer
|
|||
this.fieldsPanel.Controls.Clear();
|
||||
}
|
||||
|
||||
public static bool IsSupportedFieldType(Field field)
|
||||
=> IsSupportedFieldType(field, out var _);
|
||||
|
||||
public static bool IsSupportedFieldType(Field field, [NotNullWhen(false)] out string? unsupportedReason)
|
||||
{
|
||||
if (field.SchemaType == SchemaType.Data)
|
||||
{
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (field.SchemaType == SchemaType.List && field is ListField lf)
|
||||
{
|
||||
//We don't support lists of maps
|
||||
if (lf.Item.SchemaType == SchemaType.Map)
|
||||
{
|
||||
unsupportedReason = Resources.Errors.NestedListOfTypeNotSupportedMessageFormat.Format(SchemaType.List.ToString(), lf.Item.SchemaType.ToString());
|
||||
return false;
|
||||
}
|
||||
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (field.SchemaType == SchemaType.Map && field is MapField mf)
|
||||
{
|
||||
if (mf.Key.SchemaType != SchemaType.Data)
|
||||
{
|
||||
unsupportedReason = Resources.Errors.NestedListOfTypeNotSupportedMessageFormat.Format(SchemaType.Map.ToString(), mf.Key.SchemaType.ToString());
|
||||
return false;
|
||||
}
|
||||
else if (mf.Value.SchemaType != SchemaType.Data)
|
||||
{
|
||||
unsupportedReason = Resources.Errors.NestedListOfTypeNotSupportedMessageFormat.Format(SchemaType.Map.ToString(), mf.Value.SchemaType.ToString());
|
||||
return false;
|
||||
}
|
||||
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (field.SchemaType == SchemaType.Struct && field is StructField sf)
|
||||
{
|
||||
foreach (var structField in sf.Fields)
|
||||
{
|
||||
if (!IsSupportedFieldType(structField, out unsupportedReason))
|
||||
{
|
||||
unsupportedReason = Resources.Errors.StructWithUnsupportedFieldErrorMessageFormat.Format(field.Name, structField.Name)
|
||||
+ Environment.NewLine + unsupportedReason;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
unsupportedReason = null;
|
||||
return true;
|
||||
}
|
||||
|
||||
unsupportedReason = Resources.Errors.UnknownFieldTypeErrorMessage;
|
||||
return false;
|
||||
}
|
||||
|
||||
private void allFieldsRadioButton_CheckedChanged(object sender, EventArgs e)
|
||||
{
|
||||
if (((RadioButton)sender).Checked)
|
||||
|
|
@ -317,9 +248,9 @@ namespace ParquetViewer
|
|||
AppSettings.AlwaysSelectAllFields = false;
|
||||
|
||||
this.NewSelectedFields.Clear();
|
||||
if (this.allFieldsRadioButton.Checked || (this.fieldsPanel.Controls.Find(SelectAllCheckboxName, true).FirstOrDefault() as CheckBox)?.Checked == true)
|
||||
if (this.allFieldsRadioButton.Checked)
|
||||
{
|
||||
this.NewSelectedFields.AddRange(this.AvailableFields.Where(IsSupportedFieldType).Select(f => f.Name));
|
||||
this.NewSelectedFields.AddRange(this.AvailableFields);
|
||||
}
|
||||
else if (this.PreSelectedFields.Count > 0)
|
||||
{
|
||||
|
|
@ -328,9 +259,9 @@ namespace ParquetViewer
|
|||
else
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorMessage,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorTitle,
|
||||
MessageBoxButtons.OK,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorMessage,
|
||||
Resources.Errors.SelectAtLeastOneFieldErrorTitle,
|
||||
MessageBoxButtons.OK,
|
||||
MessageBoxIcon.Warning);
|
||||
return;
|
||||
}
|
||||
|
|
@ -353,19 +284,19 @@ namespace ParquetViewer
|
|||
{
|
||||
if (!string.IsNullOrWhiteSpace(this.filterColumnsTextbox.Text))
|
||||
{
|
||||
IEnumerable<Field> filteredFields;
|
||||
IEnumerable<string> filteredFields;
|
||||
var filteredColumnsNames = this.filterColumnsTextbox.Text.Split(',').ToList();
|
||||
|
||||
if (filteredColumnsNames.Count == 1)
|
||||
{
|
||||
var filter = filteredColumnsNames[0];
|
||||
filteredFields = this.AvailableFields.Where(w => w.Name.Contains(filter, StringComparison.InvariantCultureIgnoreCase));
|
||||
filteredFields = this.AvailableFields.Where(w => w.Contains(filter, StringComparison.InvariantCultureIgnoreCase));
|
||||
}
|
||||
else
|
||||
{
|
||||
char[] charsToTrim = { '"', ' ', '\'' };
|
||||
filteredColumnsNames = filteredColumnsNames.Select(s => s.Trim(charsToTrim)).ToList();
|
||||
filteredFields = this.AvailableFields.Where(w => filteredColumnsNames.Contains(w.Name));
|
||||
filteredFields = this.AvailableFields.Where(w => filteredColumnsNames.Contains(w));
|
||||
}
|
||||
|
||||
this.RenderFieldsCheckboxes(filteredFields.ToList(), this.PreSelectedFields);
|
||||
|
|
@ -412,4 +343,4 @@ namespace ParquetViewer
|
|||
this.rememberMyChoiceCheckBox.DisabledForeColor = this._disabledTextColor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -14,6 +14,7 @@ namespace ParquetViewer.Helpers
|
|||
public static class Constants
|
||||
{
|
||||
public const string WikiURL = "https://github.com/mukunku/ParquetViewer/wiki";
|
||||
public const string DuckDBSqlSyntaxURL = "https://duckdb.org/docs/stable/sql/query_syntax/select";
|
||||
}
|
||||
|
||||
public static class User
|
||||
|
|
@ -175,4 +176,4 @@ namespace ParquetViewer.Helpers
|
|||
PARQUET,
|
||||
XLSX,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -15,6 +15,7 @@ namespace ParquetViewer.Helpers
|
|||
{ typeof(bool), "BIT {1}NULL" },
|
||||
{ typeof(char), "CHAR {1}NULL" },
|
||||
{ typeof(DateTime), "DATETIME {1}NULL" },
|
||||
{ typeof(DateOnly), "DATE {1}NULL" },
|
||||
{ typeof(double), "FLOAT {1}NULL" },
|
||||
{ typeof(uint), "INT {1}NULL" },
|
||||
{ typeof(int), "INT {1}NULL" },
|
||||
|
|
@ -27,11 +28,12 @@ namespace ParquetViewer.Helpers
|
|||
{ typeof(sbyte), "TINYINT {1}NULL" },
|
||||
{ typeof(string), "NVARCHAR({0}) {1}NULL" },
|
||||
{ typeof(TimeSpan), "INT {1}NULL" },
|
||||
{ typeof(TimeOnly), "INT {1}NULL" },
|
||||
{ typeof(byte[]), "VARBINARY {1}NULL" },
|
||||
{ typeof(ListValue), "sql_variant {1}NULL /*LIST*/" },
|
||||
{ typeof(MapValue), "sql_variant {1}NULL /*MAP*/" },
|
||||
{ typeof(StructValue), "sql_variant {1}NULL /*STRUCT*/" },
|
||||
{ typeof(ByteArrayValue), "VARBINARY({0}) {1}NULL" },
|
||||
{ typeof(IListValue), "sql_variant {1}NULL /*LIST*/" },
|
||||
{ typeof(IMapValue), "sql_variant {1}NULL /*MAP*/" },
|
||||
{ typeof(IStructValue), "sql_variant {1}NULL /*STRUCT*/" },
|
||||
{ typeof(IByteArrayValue), "VARBINARY({0}) {1}NULL" },
|
||||
};
|
||||
|
||||
public string? TablePrefix { get; set; }
|
||||
|
|
@ -82,9 +84,19 @@ namespace ParquetViewer.Helpers
|
|||
|
||||
public static string GetTypeFor(DataColumn column)
|
||||
{
|
||||
var item = TypeMap[column.DataType] as string
|
||||
?? throw new NotSupportedException($"No type mapping is provided for {column.DataType.Name}");
|
||||
bool useMaxKeyword = column.DataType == typeof(string) || column.DataType == typeof(ByteArrayValue);
|
||||
Type columnType = column.DataType;
|
||||
if (columnType.ImplementsInterface<IListValue>())
|
||||
columnType = typeof(IListValue);
|
||||
else if (columnType.ImplementsInterface<IMapValue>())
|
||||
columnType = typeof(IMapValue);
|
||||
else if (columnType.ImplementsInterface<IStructValue>())
|
||||
columnType = typeof(IStructValue);
|
||||
else if (columnType.ImplementsInterface<IByteArrayValue>())
|
||||
columnType = typeof(IByteArrayValue);
|
||||
|
||||
var item = TypeMap[columnType] as string
|
||||
?? throw new NotSupportedException(string.Format("No type mapping is provided for {0}", column.DataType.Name));
|
||||
bool useMaxKeyword = column.DataType == typeof(string) || column.DataType.ImplementsInterface<IByteArrayValue>();
|
||||
return string.Format(item, useMaxKeyword ? "MAX" : column.MaxLength.ToString(), column.AllowDBNull ? string.Empty : "NOT ");
|
||||
}
|
||||
|
||||
|
|
@ -166,4 +178,4 @@ namespace ParquetViewer.Helpers
|
|||
return stringBuilder.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -109,4 +109,4 @@ namespace ParquetViewer.Helpers
|
|||
writer.Flush();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,10 +6,8 @@ using System.ComponentModel;
|
|||
using System.Data;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Drawing;
|
||||
using System.Globalization;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
using System.Windows.Forms;
|
||||
|
||||
namespace ParquetViewer.Helpers
|
||||
|
|
@ -17,7 +15,11 @@ namespace ParquetViewer.Helpers
|
|||
public static class ExtensionMethods
|
||||
{
|
||||
private const string DefaultDateTimeFormat = "g";
|
||||
private const string DefaultDateOnlyFormat = "d";
|
||||
private const string DefaultTimeOnlyFormat = "T";
|
||||
public const string ISO8601DateTimeFormat = "yyyy-MM-ddTHH:mm:ss.FFFFFFF";
|
||||
public const string ISO8601DateOnlyFormat = "yyyy-MM-dd";
|
||||
public const string ISO8601TimeOnlyFormat = "HH:mm:ss.FFFFFFF";
|
||||
|
||||
/// <summary>
|
||||
/// Returns a list of all column names within a given datatable
|
||||
|
|
@ -47,6 +49,24 @@ namespace ParquetViewer.Helpers
|
|||
_ => string.Empty
|
||||
};
|
||||
|
||||
public static string GetDateOnlyFormat(this DateFormat dateFormat) => dateFormat switch
|
||||
{
|
||||
DateFormat.ISO8601 => ISO8601DateOnlyFormat,
|
||||
DateFormat.Default => DefaultDateOnlyFormat,
|
||||
DateFormat.Custom => AppSettings.CustomDateFormat is not null ?
|
||||
UtilityMethods.StripTimeComponentsFromDateTimeFormat(AppSettings.CustomDateFormat) : DefaultDateOnlyFormat,
|
||||
_ => string.Empty
|
||||
};
|
||||
|
||||
public static string GetTimeOnlyFormat(this DateFormat dateFormat) => dateFormat switch
|
||||
{
|
||||
DateFormat.ISO8601 => ISO8601TimeOnlyFormat,
|
||||
DateFormat.Default => DefaultTimeOnlyFormat,
|
||||
DateFormat.Custom => AppSettings.CustomDateFormat is not null ?
|
||||
UtilityMethods.StripDateComponentsFromDateTimeFormat(AppSettings.CustomDateFormat) : DefaultTimeOnlyFormat,
|
||||
_ => string.Empty
|
||||
};
|
||||
|
||||
public static string GetExtension(this FileType fileType)
|
||||
=> Enum.IsDefined(fileType)
|
||||
? $".{fileType.ToString().ToLowerInvariant()}"
|
||||
|
|
@ -56,7 +76,7 @@ namespace ParquetViewer.Helpers
|
|||
|
||||
public static Size RenderedSize(this PictureBox pictureBox)
|
||||
{
|
||||
var wfactor = (double)pictureBox.Image.Width / pictureBox.ClientSize.Width;
|
||||
var wfactor = (double)pictureBox.Image!.Width / pictureBox.ClientSize.Width;
|
||||
var hfactor = (double)pictureBox.Image.Height / pictureBox.ClientSize.Height;
|
||||
|
||||
var resizeFactor = Math.Max(wfactor, hfactor);
|
||||
|
|
@ -79,6 +99,14 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
|
||||
public static IEnumerable<DataGridViewColumn> AsEnumerable(this DataGridViewColumnCollection columns)
|
||||
{
|
||||
foreach (DataGridViewColumn column in columns)
|
||||
{
|
||||
yield return column;
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the type is a "simple" type. Basically anything that isn't a class, struct or array.
|
||||
/// </summary>
|
||||
|
|
@ -86,12 +114,6 @@ namespace ParquetViewer.Helpers
|
|||
public static bool IsSimple(this Type type)
|
||||
=> TypeDescriptor.GetConverter(type).CanConvertFrom(typeof(string));
|
||||
|
||||
/// <summary>
|
||||
/// Returns true if the type is a number type.
|
||||
/// </summary>
|
||||
public static bool IsNumber(this Type type) =>
|
||||
System.Array.Exists(type.GetInterfaces(), i => i.IsGenericType && i.GetGenericTypeDefinition() == typeof(INumber<>));
|
||||
|
||||
public static T ToEnum<T>(this int value, T @default) where T : struct, Enum
|
||||
{
|
||||
if (Enum.IsDefined(typeof(T), value))
|
||||
|
|
@ -109,54 +131,6 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
|
||||
public static Array GetColumnValues(this DataTable dataTable, Type type, string columnName, int skipCount, int fetchCount)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(dataTable);
|
||||
ArgumentNullException.ThrowIfNull(type);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThan(skipCount, 0);
|
||||
ArgumentOutOfRangeException.ThrowIfLessThanOrEqual(fetchCount, 0);
|
||||
|
||||
if (!dataTable.Columns.Contains(columnName))
|
||||
throw new ArgumentException($"Column `{columnName}` does not exist in the datatable");
|
||||
|
||||
var recordCountAfterSkip = dataTable.Rows.Count - skipCount;
|
||||
var recordCountToRead = fetchCount > recordCountAfterSkip ? recordCountAfterSkip : fetchCount;
|
||||
var values = Array.CreateInstance(type, recordCountToRead);
|
||||
var index = 0;
|
||||
foreach(DataRow row in dataTable.Rows)
|
||||
{
|
||||
if (skipCount-- > 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
var value = row[columnName];
|
||||
if (value == DBNull.Value)
|
||||
value = null;
|
||||
else if (value is ByteArrayValue byteArray)
|
||||
value = byteArray.Data;
|
||||
else if (value is ListValue || value is MapValue || value is StructValue)
|
||||
throw new NotSupportedException("List, Map, and Struct types are currently not supported.");
|
||||
|
||||
values.SetValue(value, index++);
|
||||
|
||||
if (--fetchCount <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return values;
|
||||
}
|
||||
|
||||
public static Type GetNullableVersion(this Type sourceType) => sourceType == null
|
||||
? throw new ArgumentNullException(nameof(sourceType))
|
||||
: !sourceType.IsValueType
|
||||
|| (sourceType.IsGenericType
|
||||
&& sourceType.GetGenericTypeDefinition() == typeof(Nullable<>))
|
||||
? sourceType
|
||||
: typeof(Nullable<>).MakeGenericType(sourceType);
|
||||
|
||||
/// <summary>
|
||||
/// Converts a float to a string without using the scientific notation, if possible
|
||||
/// </summary>
|
||||
|
|
@ -232,8 +206,8 @@ namespace ParquetViewer.Helpers
|
|||
return enumerable;
|
||||
}
|
||||
|
||||
/// <remarks>Can't put this into ByteArrayValue itself as that assembly doesn't reference System.Drawing</remarks>
|
||||
public static bool ToImage(this ByteArrayValue byteArrayValue, out Image? image)
|
||||
/// <remarks>Can't put this into IByteArrayValue itself as that assembly doesn't reference System.Drawing</remarks>
|
||||
public static bool ToImage(this IByteArrayValue byteArrayValue, [NotNullWhen(true)] out Image? image)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(byteArrayValue);
|
||||
|
||||
|
|
@ -259,7 +233,33 @@ namespace ParquetViewer.Helpers
|
|||
catch { /*swallow*/ }
|
||||
}
|
||||
|
||||
public static string Format(this string formatString, params object?[] args)
|
||||
public static bool ImplementsInterface<T>(this Type? type)
|
||||
{
|
||||
if (type is null)
|
||||
return false;
|
||||
else
|
||||
return typeof(T).IsAssignableFrom(type);
|
||||
}
|
||||
|
||||
public static string Format(this string formatString, params object?[] args)
|
||||
=> string.Format(formatString, args);
|
||||
|
||||
/// <summary>
|
||||
/// https://huggingface.co/docs/hub/en/datasets-image#parquet-format
|
||||
/// </summary>
|
||||
/// <returns>True if this is a struct with "bytes" and "path" fields</returns>
|
||||
public static bool IsHuggingFaceFormat(this IStructValue structValue, [NotNullWhen(true)] out byte[]? data)
|
||||
{
|
||||
if (structValue.Data.ColumnNames.Count == 2
|
||||
&& structValue.Data.ColumnNames.Contains("bytes")
|
||||
&& structValue.Data.ColumnNames.Contains("path")
|
||||
&& structValue.Data.GetValue("bytes") is ByteArrayValue byteArrayValue)
|
||||
{
|
||||
data = byteArrayValue.Data;
|
||||
return true;
|
||||
}
|
||||
data = null;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
using Apache.Arrow.Ipc;
|
||||
using Parquet.Meta;
|
||||
using ParquetViewer.Engine;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Text.Json;
|
||||
|
||||
|
|
@ -25,156 +26,65 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
|
||||
public static string ThriftMetadataToJSON(Engine.ParquetEngine parquetEngine, long recordCount, int fieldCount)
|
||||
public static string ThriftMetadataToJSON(IParquetEngine parquetEngine, long recordCount, int fieldCount)
|
||||
{
|
||||
try
|
||||
{
|
||||
object ProcessSchemaTree(Engine.ParquetSchemaElement parquetSchemaElement)
|
||||
{
|
||||
return new
|
||||
{
|
||||
parquetSchemaElement.Path,
|
||||
Type = parquetSchemaElement.SchemaElement.Type.ToString(),
|
||||
parquetSchemaElement.SchemaElement.TypeLength,
|
||||
LogicalType = LogicalTypeToJSONObject(parquetSchemaElement.SchemaElement.LogicalType),
|
||||
RepetitionType = parquetSchemaElement.SchemaElement.RepetitionType.ToString(),
|
||||
ConvertedType = parquetSchemaElement.SchemaElement.ConvertedType.ToString(),
|
||||
Children = parquetSchemaElement.Children.Select(pse => ProcessSchemaTree(pse)).ToArray()
|
||||
};
|
||||
}
|
||||
|
||||
var jsonObject = new
|
||||
{
|
||||
parquetEngine.ThriftMetadata.Version,
|
||||
parquetEngine.Metadata.ParquetVersion,
|
||||
NumRows = recordCount,
|
||||
NumRowGroups = parquetEngine.ThriftMetadata.RowGroups?.Count ?? -1, //What about partitioned files?
|
||||
NumRowGroups = parquetEngine.Metadata.RowGroupCount, //We assume partitioned files all have the same row group count
|
||||
NumFields = fieldCount,
|
||||
parquetEngine.ThriftMetadata.CreatedBy,
|
||||
Schema = ProcessSchemaTree(parquetEngine.ParquetSchemaTree),
|
||||
RowGroups = (parquetEngine.ThriftMetadata.RowGroups ?? Enumerable.Empty<RowGroup>()).Select(rowGroup => new
|
||||
parquetEngine.Metadata.CreatedBy,
|
||||
Schema = new
|
||||
{
|
||||
parquetEngine.Metadata.SchemaTree.Path,
|
||||
RepetitionType = parquetEngine.Metadata.SchemaTree.RepetitionType?.ToString().ToUpper(),
|
||||
Children = ProcessChildren(parquetEngine.Metadata.SchemaTree)
|
||||
},
|
||||
RowGroups = (parquetEngine.Metadata.RowGroups ?? Enumerable.Empty<IRowGroupMetadata>()).Select(rowGroup => new
|
||||
{
|
||||
rowGroup.Ordinal,
|
||||
rowGroup.NumRows,
|
||||
SortingColumns = (rowGroup.SortingColumns ?? Enumerable.Empty<SortingColumn>()).Select(sortingColumn => new
|
||||
{
|
||||
sortingColumn.ColumnIdx,
|
||||
sortingColumn.Descending,
|
||||
sortingColumn.NullsFirst
|
||||
}).ToArray(),
|
||||
rowGroup.RowCount,
|
||||
rowGroup.SortingColumns,
|
||||
rowGroup.Columns,
|
||||
rowGroup.FileOffset,
|
||||
rowGroup.TotalByteSize,
|
||||
rowGroup.TotalCompressedSize
|
||||
}).ToArray()
|
||||
};
|
||||
|
||||
return JsonSerializer.Serialize(jsonObject, new JsonSerializerOptions { WriteIndented = true });
|
||||
IEnumerable<object> ProcessChildren(IParquetSchemaElement schemaElement)
|
||||
{
|
||||
foreach (var child in (schemaElement.Children ?? Enumerable.Empty<IParquetSchemaElement>()))
|
||||
{
|
||||
yield return new
|
||||
{
|
||||
child.Path,
|
||||
child.Type,
|
||||
child.TypeLength,
|
||||
child.LogicalType,
|
||||
RepetitionType = child.RepetitionType?.ToString().ToUpper(),
|
||||
child.ConvertedType,
|
||||
child.Scale,
|
||||
child.Precision,
|
||||
child.NumChildren,
|
||||
Children = child.Children.Select(ProcessChildren)
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
return JsonSerializer.Serialize(jsonObject, new JsonSerializerOptions
|
||||
{
|
||||
WriteIndented = true,
|
||||
DefaultIgnoreCondition = System.Text.Json.Serialization.JsonIgnoreCondition.WhenWritingNull
|
||||
});
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
return $"Something went wrong while processing the schema:{Environment.NewLine}{Environment.NewLine}{ex}";
|
||||
}
|
||||
|
||||
static object? LogicalTypeToJSONObject(LogicalType? logicalType)
|
||||
{
|
||||
if (logicalType is null)
|
||||
{
|
||||
return null;
|
||||
}
|
||||
else if (logicalType.STRING is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.STRING) };
|
||||
}
|
||||
else if (logicalType.MAP is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.MAP) };
|
||||
}
|
||||
else if (logicalType.LIST is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.LIST) };
|
||||
}
|
||||
else if (logicalType.ENUM is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.ENUM) };
|
||||
}
|
||||
else if (logicalType.DECIMAL is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.DECIMAL),
|
||||
logicalType.DECIMAL.Scale,
|
||||
logicalType.DECIMAL.Precision
|
||||
};
|
||||
}
|
||||
else if (logicalType.DATE is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.DATE) };
|
||||
}
|
||||
else if (logicalType.TIME is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIME),
|
||||
logicalType.TIME.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIME.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.TIMESTAMP is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.TIMESTAMP),
|
||||
logicalType.TIMESTAMP.IsAdjustedToUTC,
|
||||
Unit = TimeUnitToString(logicalType.TIMESTAMP.Unit)
|
||||
};
|
||||
}
|
||||
else if (logicalType.INTEGER is not null)
|
||||
{
|
||||
return new
|
||||
{
|
||||
Name = nameof(logicalType.INTEGER),
|
||||
logicalType.INTEGER.BitWidth,
|
||||
logicalType.INTEGER.IsSigned
|
||||
};
|
||||
}
|
||||
else if (logicalType.JSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.JSON) };
|
||||
}
|
||||
else if (logicalType.BSON is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.BSON) };
|
||||
}
|
||||
else if (logicalType.UUID is not null)
|
||||
{
|
||||
return new { Name = nameof(logicalType.UUID) };
|
||||
}
|
||||
else if (logicalType.UNKNOWN is not null)
|
||||
{
|
||||
return new { Name = $"{logicalType.UNKNOWN.GetType().Name}" };
|
||||
}
|
||||
else
|
||||
{
|
||||
return new { Name = nameof(logicalType.UNKNOWN) };
|
||||
}
|
||||
}
|
||||
|
||||
static string TimeUnitToString(TimeUnit? timeUnit)
|
||||
{
|
||||
var timeUnitString = string.Empty;
|
||||
if (timeUnit?.MILLIS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MILLIS);
|
||||
}
|
||||
else if (timeUnit?.MICROS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.MICROS);
|
||||
}
|
||||
else if (timeUnit?.NANOS is not null)
|
||||
{
|
||||
timeUnitString = nameof(timeUnit.NANOS);
|
||||
}
|
||||
return timeUnitString;
|
||||
}
|
||||
}
|
||||
|
||||
public static string TryFormatJSON(string possibleJSON)
|
||||
|
|
@ -191,4 +101,4 @@ namespace ParquetViewer.Helpers
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -130,4 +130,4 @@ namespace ParquetViewer.Helpers
|
|||
public override string ToString()
|
||||
=> $"{this.Major}.{this.Minor}.{this.Patch}.{this.Build}";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -19,6 +19,8 @@ namespace ParquetViewer.Helpers
|
|||
public Color SelectionBackColor { get; }
|
||||
public Color FormBackgroundColor { get; }
|
||||
public Color DisabledTextColor { get; }
|
||||
public Color FrozenColumnHeaderColor { get; }
|
||||
public Color FrozenCellBackgroundColor { get; }
|
||||
|
||||
private readonly Func<Theme, ToolStripProfessionalRenderer>? _toolStripRendererProvider = null;
|
||||
public bool HasToolStripRendererProvider => _toolStripRendererProvider is not null;
|
||||
|
|
@ -41,13 +43,14 @@ namespace ParquetViewer.Helpers
|
|||
Color formBackgroundColor,
|
||||
Func<Theme, ToolStripProfessionalRenderer>? toolStripRendererProvider,
|
||||
Color activeHyperlinkColor,
|
||||
Color disabledTextColor)
|
||||
Color disabledTextColor,
|
||||
Color frozenColumnHeaderColor,
|
||||
Color frozenCellBackgroundColor)
|
||||
{
|
||||
this.CellBackgroundColor = cellBackgroundColor;
|
||||
this.TextColor = textColor;
|
||||
this.AlternateRowsCellBackgroundColor = alternateRowsCellBackgroundColor;
|
||||
this.ColumnHeaderColor = columnHeaderColor;
|
||||
this.ColumnHeaderColor = columnHeaderColor;
|
||||
this.RowHeaderColor = rowHeaderColor;
|
||||
this.RowHeaderBorderStyle = rowHeaderBorderStyle;
|
||||
this.GridBackgroundColor = gridBackgroundColor;
|
||||
|
|
@ -59,6 +62,8 @@ namespace ParquetViewer.Helpers
|
|||
this._toolStripRendererProvider = toolStripRendererProvider;
|
||||
this.ActiveHyperlinkColor = activeHyperlinkColor;
|
||||
this.DisabledTextColor = disabledTextColor;
|
||||
this.FrozenColumnHeaderColor = frozenColumnHeaderColor;
|
||||
this.FrozenCellBackgroundColor = frozenCellBackgroundColor;
|
||||
}
|
||||
|
||||
public static Theme DarkModeTheme => new(
|
||||
|
|
@ -76,7 +81,9 @@ namespace ParquetViewer.Helpers
|
|||
Color.FromArgb(44, 44, 44),
|
||||
(theme) => { return new DarkModeToolStripRenderer(theme); },
|
||||
Color.LightGray,
|
||||
Color.DarkGray
|
||||
Color.DarkGray,
|
||||
Color.FromArgb(33, 37, 63),
|
||||
Color.FromArgb(40, 44, 48)
|
||||
);
|
||||
|
||||
public static Theme LightModeTheme => new(
|
||||
|
|
@ -94,7 +101,9 @@ namespace ParquetViewer.Helpers
|
|||
SystemColors.Control,
|
||||
null,
|
||||
Color.Red,
|
||||
Color.DarkGray
|
||||
Color.DarkGray,
|
||||
SystemColors.InactiveCaption,
|
||||
SystemColors.InactiveBorder
|
||||
);
|
||||
|
||||
public bool Equals(Theme other) => this.GetHashCode() == other.GetHashCode(); //Not perfect but good enough
|
||||
|
|
@ -169,5 +178,11 @@ namespace ParquetViewer.Helpers
|
|||
public override Color MenuItemPressedGradientMiddle => this._theme.SelectionBackColor;
|
||||
public override Color MenuItemPressedGradientEnd => this._theme.SelectionBackColor;
|
||||
#endregion
|
||||
|
||||
#region Dropdown hover colors
|
||||
public override Color ButtonSelectedGradientBegin => this._theme.SelectionBackColor;
|
||||
public override Color ButtonSelectedGradientMiddle => this._theme.SelectionBackColor;
|
||||
public override Color ButtonSelectedGradientEnd => this._theme.SelectionBackColor;
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -98,5 +98,39 @@ namespace ParquetViewer.Helpers
|
|||
//Terminate this instance
|
||||
Application.Exit();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Best effort attempt at stripping time components from a datetime format string.
|
||||
/// </summary>
|
||||
/// <param name="dateFormat">Date format with potential time components</param>
|
||||
/// <returns>Date format with no time components</returns>
|
||||
public static string StripTimeComponentsFromDateTimeFormat(string dateFormat)
|
||||
{
|
||||
var timeComponents = new string[] { "H", "h", "m", "s", "f", "F", "t", "z", "K" };
|
||||
foreach (var component in timeComponents)
|
||||
{
|
||||
dateFormat = dateFormat.Replace(component, string.Empty);
|
||||
}
|
||||
dateFormat = dateFormat.Replace(" ", " ");
|
||||
dateFormat = dateFormat.TrimEnd('/', '-', '.', ' ', ',', '_');
|
||||
return dateFormat.Trim();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Best effort attempt at stripping date components from a datetime format string.
|
||||
/// </summary>
|
||||
/// <param name="dateFormat">Date format with potential date components</param>
|
||||
/// <returns>Time format with no date components</returns>
|
||||
public static string StripDateComponentsFromDateTimeFormat(string dateFormat)
|
||||
{
|
||||
var dateComponents = new string[] { "y", "M", "d", "g" };
|
||||
foreach (var component in dateComponents)
|
||||
{
|
||||
dateFormat = dateFormat.Replace(component, string.Empty);
|
||||
}
|
||||
dateFormat = dateFormat.Replace(" ", " ");
|
||||
dateFormat = dateFormat.TrimStart('/', '-', '.', ' ', ',', '_');
|
||||
return dateFormat.Trim();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -148,4 +148,4 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
2
src/ParquetViewer/MainForm.Designer.cs
generated
2
src/ParquetViewer/MainForm.Designer.cs
generated
|
|
@ -186,9 +186,11 @@ namespace ParquetViewer
|
|||
dataGridViewCellStyle1.WrapMode = DataGridViewTriState.True;
|
||||
mainGridView.ColumnHeadersDefaultCellStyle = dataGridViewCellStyle1;
|
||||
mainGridView.ColumnHeadersHeightSizeMode = DataGridViewColumnHeadersHeightSizeMode.DisableResizing;
|
||||
mainGridView.ColumnNameEscapeFormat = "[{0}]";
|
||||
mainTableLayoutPanel.SetColumnSpan(mainGridView, 11);
|
||||
mainGridView.CopyAsWhereIcon = (System.Drawing.Image)resources.GetObject("mainGridView.CopyAsWhereIcon");
|
||||
mainGridView.CopyToClipboardIcon = (System.Drawing.Image)resources.GetObject("mainGridView.CopyToClipboardIcon");
|
||||
mainGridView.DateValueEscapeFormat = "#{0}#";
|
||||
mainGridView.EnableHeadersVisualStyles = false;
|
||||
mainGridView.Name = "mainGridView";
|
||||
mainGridView.ReadOnly = true;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ using ParquetViewer.Engine.Types;
|
|||
using ParquetViewer.Exceptions;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
using System.Data;
|
||||
using System.Diagnostics;
|
||||
using System.Drawing;
|
||||
using System.Globalization;
|
||||
|
|
@ -45,7 +46,7 @@ namespace ParquetViewer
|
|||
private void recordsToTextBox_TextChanged(object sender, EventArgs? e)
|
||||
{
|
||||
var textbox = (TextBox)sender;
|
||||
if (int.TryParse(textbox.Text, out var recordCount))
|
||||
if (int.TryParse(textbox.Text, out var recordCount) && recordCount > 0)
|
||||
this.CurrentMaxRowCount = recordCount;
|
||||
else
|
||||
textbox.Text = this.CurrentMaxRowCount.ToString();
|
||||
|
|
@ -173,8 +174,8 @@ namespace ParquetViewer
|
|||
|
||||
//Treat list, map, and struct types as strings by casting them automatically
|
||||
foreach (var complexField in this.mainGridView.Columns.OfType<DataGridViewColumn>()
|
||||
.Where(c => c.ValueType == typeof(ListValue) || c.ValueType == typeof(MapValue)
|
||||
|| c.ValueType == typeof(StructValue) || c.ValueType == typeof(ByteArrayValue))
|
||||
.Where(c => c.ValueType.ImplementsInterface<IListValue>() || c.ValueType.ImplementsInterface<IMapValue>()
|
||||
|| c.ValueType.ImplementsInterface<IStructValue>() || c.ValueType.ImplementsInterface<IByteArrayValue>())
|
||||
.Select(c => c.Name))
|
||||
{
|
||||
//This isn't perfect but it should handle most cases
|
||||
|
|
@ -288,4 +289,4 @@ namespace ParquetViewer
|
|||
UtilityMethods.RestartApplication();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
using MiniExcelLibs;
|
||||
using ParquetViewer.Analytics;
|
||||
using ParquetViewer.Engine;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Engine.Types;
|
||||
using ParquetViewer.Exceptions;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
|
|
@ -55,7 +55,8 @@ namespace ParquetViewer
|
|||
this.exportFileDialog.Filter = "CSV file (*.csv)|*.csv|JSON file (*.json)|*.json|Excel '93 file (*.xls)|*.xls|Excel '07 file (*.xlsx)|*.xlsx";
|
||||
this.exportFileDialog.FilterIndex = (int)defaultFileType + 1;
|
||||
|
||||
if (this._openParquetEngine?.ParquetSchemaTree?.Children.All(s => s.FieldType == Engine.ParquetSchemaElement.FieldTypeId.Primitive) == true)
|
||||
if (this._openParquetEngine?.Metadata.SchemaTree?.Children.All(s => s.IsPrimitive) == true
|
||||
&& this._openParquetEngine is Engine.ParquetNET.ParquetEngine)
|
||||
{
|
||||
this.exportFileDialog.Filter += "|Parquet file (*.parquet)|*.parquet";
|
||||
}
|
||||
|
|
@ -67,55 +68,13 @@ namespace ParquetViewer
|
|||
|
||||
var fileExtension = Path.GetExtension(filePath);
|
||||
FileType? selectedFileType = UtilityMethods.ExtensionToFileType(fileExtension);
|
||||
if (selectedFileType is null)
|
||||
throw new ArgumentOutOfRangeException(fileExtension);
|
||||
|
||||
var stopWatch = Stopwatch.StartNew();
|
||||
loadingIcon = this.ShowLoadingIcon(Resources.Strings.ExportingDataLabelText, this.MainDataSource.DefaultView.Count * this.MainDataSource.Columns.Count);
|
||||
if (selectedFileType == FileType.CSV)
|
||||
{
|
||||
await WriteDataToCSVFile(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLS)
|
||||
{
|
||||
const int MAX_XLS_COLUMN_COUNT = 256; //.xls format has a hard limit on 256 columns
|
||||
if (this.MainDataSource!.Columns.Count > MAX_XLS_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.TooManyColumnsXlsErrorMessageFormat.Format(MAX_XLS_COLUMN_COUNT, this.MainDataSource.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
await WriteDataToExcel93File(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLSX)
|
||||
{
|
||||
const int MAX_XLSX_COLUMN_COUNT = 16384; //.xlsx format has a hard limit on 16384 columns
|
||||
if (this.MainDataSource!.Columns.Count > MAX_XLSX_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(this,
|
||||
Resources.Errors.TooManyColumnsXlsxErrorMessageFormat.Format(MAX_XLSX_COLUMN_COUNT, this.MainDataSource.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
await WriteDataToExcel2007File(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.JSON)
|
||||
{
|
||||
await WriteDataToJSONFile(this.MainDataSource, filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else if (selectedFileType == FileType.PARQUET)
|
||||
{
|
||||
await this.WriteDataToParquetFile(filePath, loadingIcon.CancellationToken, loadingIcon);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format(Resources.Errors.UnsupportedExportTypeFormat, fileExtension));
|
||||
}
|
||||
await ExportResultsImpl(this.MainDataSource!, selectedFileType.Value, this._openParquetEngine,
|
||||
filePath, loadingIcon.CancellationToken, loadingIcon, this.OpenFileOrFolderPath);
|
||||
|
||||
if (loadingIcon.CancellationToken.IsCancellationRequested)
|
||||
{
|
||||
|
|
@ -127,15 +86,15 @@ namespace ParquetViewer
|
|||
long fileSizeInBytes = new FileInfo(filePath).Length;
|
||||
|
||||
FileExportEvent.FireAndForget(
|
||||
selectedFileType.Value,
|
||||
selectedFileType.Value,
|
||||
fileSizeInBytes,
|
||||
this.mainGridView.RowCount,
|
||||
this.mainGridView.ColumnCount,
|
||||
this.mainGridView.RowCount,
|
||||
this.mainGridView.ColumnCount,
|
||||
stopWatch.ElapsedMilliseconds);
|
||||
|
||||
MessageBox.Show(this,
|
||||
Resources.Strings.ExportSuccessfulMessageFormat.Format(Math.Round((fileSizeInBytes / 1024.0) / 1024.0, 2)),
|
||||
Resources.Strings.ExportSuccessfulTitle,
|
||||
Resources.Strings.ExportSuccessfulTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Information);
|
||||
}
|
||||
}
|
||||
|
|
@ -149,10 +108,10 @@ namespace ParquetViewer
|
|||
catch (XlsCellLengthException ex)
|
||||
{
|
||||
CleanupFile(filePath);
|
||||
|
||||
|
||||
if (MessageBox.Show(this,
|
||||
Resources.Strings.SwitchFromXlsToXlsxMessageFormat.Format(ex.MaxLength, ex.FileType.GetExtension(), FileType.XLSX.GetExtension()),
|
||||
Resources.Strings.SwitchFromXlsToXlsxMessageTitle,
|
||||
Resources.Strings.SwitchFromXlsToXlsxMessageTitle,
|
||||
MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation) == DialogResult.OK)
|
||||
{
|
||||
rerunType = FileType.XLSX;
|
||||
|
|
@ -184,10 +143,64 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
|
||||
private async Task WriteDataToExcel2007File(DataTable mainDataSource, string path, CancellationToken cancellationToken, IProgress<int> progress)
|
||||
|
||||
private static Task ExportResultsImpl(DataTable dataTable, FileType selectedFileType, IParquetEngine? engine,
|
||||
string filePath, CancellationToken cancellationToken, IProgress<int> progress, string? sourceFileOrFolderPath)
|
||||
{
|
||||
if (selectedFileType == FileType.CSV)
|
||||
{
|
||||
return WriteDataToCSVFile(dataTable, filePath, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLS)
|
||||
{
|
||||
const int MAX_XLS_COLUMN_COUNT = 256; //.xls format has a hard limit on 256 columns
|
||||
if (dataTable.Columns.Count > MAX_XLS_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(
|
||||
Resources.Errors.TooManyColumnsXlsErrorMessageFormat.Format(MAX_XLS_COLUMN_COUNT, dataTable.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
return WriteDataToExcel93File(dataTable, filePath, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.XLSX)
|
||||
{
|
||||
const int MAX_XLSX_COLUMN_COUNT = 16384; //.xlsx format has a hard limit on 16384 columns
|
||||
if (dataTable.Columns.Count > MAX_XLSX_COLUMN_COUNT)
|
||||
{
|
||||
MessageBox.Show(
|
||||
Resources.Errors.TooManyColumnsXlsxErrorMessageFormat.Format(MAX_XLSX_COLUMN_COUNT, dataTable.Columns.Count),
|
||||
Resources.Errors.TooManyColumnsErrorTitle,
|
||||
MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
|
||||
return Task.CompletedTask;
|
||||
}
|
||||
|
||||
var sheetName = Path.GetFileNameWithoutExtension(sourceFileOrFolderPath) ?? "Sheet1";
|
||||
return WriteDataToExcel2007File(dataTable, filePath, sheetName, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.JSON)
|
||||
{
|
||||
return WriteDataToJSONFile(dataTable, filePath, cancellationToken, progress);
|
||||
}
|
||||
else if (selectedFileType == FileType.PARQUET)
|
||||
{
|
||||
ArgumentNullException.ThrowIfNull(engine);
|
||||
var engineTypeName = engine is Engine.ParquetNET.ParquetEngine ? "ParquetNET" : "DuckDB";
|
||||
return WriteDataToParquetFile(engine, dataTable, filePath, cancellationToken, progress, engineTypeName);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception(string.Format(Resources.Errors.UnsupportedExportTypeFormat, selectedFileType.ToString()));
|
||||
}
|
||||
}
|
||||
|
||||
private static async Task WriteDataToExcel2007File(DataTable mainDataSource, string path, string sheetName, CancellationToken cancellationToken, IProgress<int> progress)
|
||||
{
|
||||
const int MAX_XLSX_SHEET_NAME_LENGTH = 31;
|
||||
var sheetName = Path.GetFileNameWithoutExtension(this.OpenFileOrFolderPath) ?? "Sheet1";
|
||||
|
||||
//sanitize sheet name
|
||||
sheetName = Regex.Replace(sheetName, "[^a-zA-Z0-9 _\\-()]", string.Empty).Left(MAX_XLSX_SHEET_NAME_LENGTH);
|
||||
|
|
@ -223,6 +236,8 @@ namespace ParquetViewer
|
|||
writer.WriteLine(rowBuilder.ToString());
|
||||
|
||||
string dateFormat = AppSettings.DateTimeDisplayFormat.GetDateFormat();
|
||||
string dateOnlyFormat = AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat();
|
||||
string timeOnlyFormat = AppSettings.DateTimeDisplayFormat.GetTimeOnlyFormat();
|
||||
foreach (DataRowView row in dataTable.DefaultView)
|
||||
{
|
||||
rowBuilder.Clear();
|
||||
|
|
@ -248,6 +263,14 @@ namespace ParquetViewer
|
|||
{
|
||||
rowBuilder.Append(UtilityMethods.CleanCSVValue(dt.ToString(dateFormat)));
|
||||
}
|
||||
else if (value is DateOnly dateOnly)
|
||||
{
|
||||
rowBuilder.Append(UtilityMethods.CleanCSVValue(dateOnly.ToString(dateOnlyFormat)));
|
||||
}
|
||||
else if (value is TimeOnly timeOnly)
|
||||
{
|
||||
rowBuilder.Append(UtilityMethods.CleanCSVValue(timeOnly.ToString(timeOnlyFormat)));
|
||||
}
|
||||
else
|
||||
{
|
||||
var stringValue = value!.ToString()!; //we never have `null` only `DBNull.Value`
|
||||
|
|
@ -265,6 +288,8 @@ namespace ParquetViewer
|
|||
=> Task.Run(() =>
|
||||
{
|
||||
string dateFormat = AppSettings.DateTimeDisplayFormat.GetDateFormat();
|
||||
string dateOnlyFormat = AppSettings.DateTimeDisplayFormat.GetDateOnlyFormat();
|
||||
string timeOnlyFormat = AppSettings.DateTimeDisplayFormat.GetTimeOnlyFormat();
|
||||
using var fs = new FileStream(path, FileMode.OpenOrCreate);
|
||||
var excelWriter = new ExcelWriter(fs);
|
||||
excelWriter.BeginWrite();
|
||||
|
|
@ -302,6 +327,14 @@ namespace ParquetViewer
|
|||
{
|
||||
excelWriter.WriteCell(i + 1, j, dt.ToString(dateFormat));
|
||||
}
|
||||
else if (value is DateOnly dateOnly)
|
||||
{
|
||||
excelWriter.WriteCell(i + 1, j, dateOnly.ToString(dateOnlyFormat));
|
||||
}
|
||||
else if (value is TimeOnly timeOnly)
|
||||
{
|
||||
excelWriter.WriteCell(i + 1, j, timeOnly.ToString(timeOnlyFormat));
|
||||
}
|
||||
else
|
||||
{
|
||||
var stringValue = value.ToString();
|
||||
|
|
@ -353,7 +386,7 @@ namespace ParquetViewer
|
|||
jsonWriter.WritePropertyName(columnName);
|
||||
|
||||
object? value = row.Row.ItemArray[i];
|
||||
StructValue.WriteValue(jsonWriter, value!, false);
|
||||
Engine.Helpers.WriteValue(jsonWriter, value!, false);
|
||||
progress.Report(1);
|
||||
}
|
||||
jsonWriter.WriteEndObject();
|
||||
|
|
@ -361,22 +394,11 @@ namespace ParquetViewer
|
|||
jsonWriter.WriteEndArray();
|
||||
}, cancellationToken);
|
||||
|
||||
private Task WriteDataToParquetFile(string path, CancellationToken cancellationToken, IProgress<int> progress)
|
||||
private static Task WriteDataToParquetFile(IParquetEngine engine, DataTable dataTable, string path,
|
||||
CancellationToken cancellationToken, IProgress<int> progress, string engineName)
|
||||
=> Task.Run(async () =>
|
||||
{
|
||||
var fields = new List<Parquet.Schema.Field>(this.MainDataSource!.Columns.Count);
|
||||
foreach (DataColumn column in this.MainDataSource.Columns)
|
||||
{
|
||||
fields.Add(this._openParquetEngine!.Schema!.Fields
|
||||
.Where(field => field.Name.Equals(column.ColumnName, StringComparison.InvariantCulture))
|
||||
.First());
|
||||
}
|
||||
var parquetSchema = new Parquet.Schema.ParquetSchema(fields);
|
||||
|
||||
using var fs = new FileStream(path, FileMode.OpenOrCreate);
|
||||
using var parquetWriter = await Parquet.ParquetWriter.CreateAsync(parquetSchema, fs, cancellationToken: cancellationToken);
|
||||
parquetWriter.CompressionLevel = System.IO.Compression.CompressionLevel.Optimal;
|
||||
parquetWriter.CustomMetadata = new Dictionary<string, string>
|
||||
var customMetadata = new Dictionary<string, string>
|
||||
{
|
||||
{
|
||||
"ParquetViewer", @"
|
||||
|
|
@ -384,38 +406,12 @@ namespace ParquetViewer
|
|||
""CreatedWith"": ""ParquetViewer"",
|
||||
""Version"": """ + Env.AssemblyVersion.ToString() + @""",
|
||||
""Website"": ""https://github.com/mukunku/ParquetViewer"",
|
||||
""CreationDate"": """ + DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ssZ") + @"""
|
||||
""CreationDate"": """ + DateTime.UtcNow.ToString("yyyy-MM-ddTHH:mm:ssZ") + @""",
|
||||
""Engine"": """ + engineName + @"""
|
||||
}"
|
||||
}
|
||||
};
|
||||
|
||||
const int MAX_ROWS_PER_ROWGROUP = 100_000; //Without batching we sometimes get "OverflowException: Array dimensions exceeded supported range" from Parquet.NET
|
||||
var batchIndex = 0;
|
||||
var isLastBatch = false;
|
||||
while (!isLastBatch)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
using var rowGroup = parquetWriter.CreateRowGroup();
|
||||
foreach (var dataField in parquetSchema.DataFields)
|
||||
{
|
||||
if (cancellationToken.IsCancellationRequested)
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
var type = dataField.IsNullable ? dataField.ClrType.GetNullableVersion() : dataField.ClrType;
|
||||
var values = this.MainDataSource.GetColumnValues(type, dataField.Name, batchIndex * MAX_ROWS_PER_ROWGROUP, MAX_ROWS_PER_ROWGROUP);
|
||||
var dataColumn = new Parquet.Data.DataColumn(dataField, values);
|
||||
await rowGroup.WriteColumnAsync(dataColumn, cancellationToken);
|
||||
progress.Report(values.Length); //No way to report progress for each row, so do it by column
|
||||
isLastBatch = values.Length < MAX_ROWS_PER_ROWGROUP;
|
||||
}
|
||||
batchIndex++;
|
||||
}
|
||||
await engine.WriteDataToParquetFileAsync(dataTable, path, cancellationToken, progress, customMetadata);
|
||||
}, cancellationToken);
|
||||
|
||||
private static void HandleAllFilesSkippedException(AllFilesSkippedException ex)
|
||||
|
|
@ -464,10 +460,16 @@ namespace ParquetViewer
|
|||
sb.AppendLine(Resources.Errors.MultipleSchemasDetectedEntriesErrorMessageFormat.Format(schemaIndex++));
|
||||
for (var i = 0; i < topCount; i++)
|
||||
{
|
||||
if (i == schema.Fields.Count)
|
||||
if (i == schema.Count)
|
||||
break;
|
||||
|
||||
sb.AppendLine($" {schema.Fields.ElementAt(i).Name}");
|
||||
sb.AppendLine($" {schema.ElementAt(i)}");
|
||||
}
|
||||
|
||||
if (schemaIndex > maxSchemasLimit)
|
||||
{
|
||||
sb.AppendLine("...");
|
||||
break;
|
||||
}
|
||||
|
||||
if (schemaIndex > maxSchemasLimit)
|
||||
|
|
@ -484,17 +486,19 @@ namespace ParquetViewer
|
|||
ShowError(Resources.Errors.MalformedFieldErrorMessageFormat.Format(ex.Message));
|
||||
}
|
||||
|
||||
private static void HandleDecimalOverflowException(DecimalOverflowException ex)
|
||||
private static void HandleDecimalOverflowException(DecimalOverflowException ex)
|
||||
=> ShowError(
|
||||
Resources.Errors.DecimalValueTooLargeErrorMessageFormat.Format(
|
||||
ex.FieldName,
|
||||
ex.Precision,
|
||||
ex.Scale,
|
||||
DecimalOverflowException.MAX_DECIMAL_PRECISION,
|
||||
DecimalOverflowException.MAX_DECIMAL_SCALE),
|
||||
Resources.Errors.DecimalValueTooLargeErrorTitle);
|
||||
(ex.HasDetailedInfo ? Resources.Errors.DecimalValueTooLargeErrorMessageFormat
|
||||
: Resources.Errors.DecimalValueUnknownSizeTooLargeErrorMessageFormat)
|
||||
.Format(
|
||||
ex.FieldName,
|
||||
ex.Precision,
|
||||
ex.Scale,
|
||||
DecimalOverflowException.MAX_DECIMAL_PRECISION,
|
||||
DecimalOverflowException.MAX_DECIMAL_SCALE),
|
||||
Resources.Errors.DecimalValueTooLargeErrorTitle);
|
||||
|
||||
private static void ShowError(string message, string? title = null)
|
||||
private static void ShowError(string message, string? title = null)
|
||||
=> MessageBox.Show(message, title ?? Resources.Errors.GenericErrorMessage, MessageBoxButtons.OK, MessageBoxIcon.Error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -41,4 +41,4 @@ namespace ParquetViewer
|
|||
this.mainMenuStrip.Renderer = theme.ToolStripRenderer;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -231,4 +231,4 @@ namespace ParquetViewer
|
|||
RefreshExperimentalFeatureToolStrips();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
using ParquetViewer.Analytics;
|
||||
using ParquetViewer.Controls;
|
||||
using ParquetViewer.Engine;
|
||||
using ParquetViewer.Engine.Exceptions;
|
||||
using ParquetViewer.Helpers;
|
||||
using System;
|
||||
|
|
@ -138,7 +139,7 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
|
||||
private Engine.ParquetEngine? _openParquetEngine = null;
|
||||
private IParquetEngine? _openParquetEngine = null;
|
||||
#endregion
|
||||
|
||||
public MainForm()
|
||||
|
|
@ -199,13 +200,13 @@ namespace ParquetViewer
|
|||
{
|
||||
try
|
||||
{
|
||||
this._openParquetEngine = await Engine.ParquetEngine.OpenFileOrFolderAsync(this.OpenFileOrFolderPath, default);
|
||||
this._openParquetEngine = await Engine.ParquetNET.ParquetEngine.OpenFileOrFolderAsync(this.OpenFileOrFolderPath, default);
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
if (this._openParquetEngine == null)
|
||||
{
|
||||
//cancel file open
|
||||
//cancel the file open
|
||||
this.OpenFileOrFolderPath = null;
|
||||
}
|
||||
|
||||
|
|
@ -217,9 +218,9 @@ namespace ParquetViewer
|
|||
{
|
||||
HandleSomeFilesSkippedException(sfse);
|
||||
}
|
||||
else if (ex is FileReadException fre)
|
||||
else if (ex is Engine.Exceptions.FileReadException fre)
|
||||
{
|
||||
HandleFileReadException(fre);
|
||||
MainForm.HandleFileReadException(fre);
|
||||
}
|
||||
else if (ex is MultipleSchemasFoundException msfe)
|
||||
{
|
||||
|
|
@ -238,10 +239,10 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
|
||||
Parquet.Schema.ParquetSchema? schema = null;
|
||||
List<string>? fields = null;
|
||||
try
|
||||
{
|
||||
schema = this._openParquetEngine.Schema;
|
||||
fields = this._openParquetEngine.Fields;
|
||||
}
|
||||
catch (ArgumentException ex) when (ex.Message.StartsWith("at least one field is required"))
|
||||
{ /*swallow: This exception is thrown from Parquet.Net when the schema has no fields*/ }
|
||||
|
|
@ -250,12 +251,11 @@ namespace ParquetViewer
|
|||
throw new Parquet.ParquetException(Resources.Errors.ParquetSchemaReadErrorMessage, ex);
|
||||
}
|
||||
|
||||
var fields = schema?.Fields;
|
||||
if (fields?.Count > 0)
|
||||
{
|
||||
if (AppSettings.AlwaysSelectAllFields && !forceOpenDialog)
|
||||
{
|
||||
return fields.Where(FieldsToLoadForm.IsSupportedFieldType).Select(f => f.Name).ToList();
|
||||
return fields;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
@ -278,6 +278,46 @@ namespace ParquetViewer
|
|||
}
|
||||
|
||||
private async void LoadFileToGridview()
|
||||
{
|
||||
if (this._openParquetEngine is null)
|
||||
return;
|
||||
|
||||
#if RELEASE_SELFCONTAINED
|
||||
//Self contained release has both Parquet.NET and DuckDB engines included as the file size remains the same.
|
||||
try
|
||||
{
|
||||
await this.LoadFileToGridviewImpl(this._openParquetEngine);
|
||||
}
|
||||
catch (Exception unhandledEx)
|
||||
{
|
||||
//Try DuckDB if Parquet.NET fails
|
||||
if (this._openParquetEngine is Engine.DuckDB.ParquetEngine)
|
||||
throw;
|
||||
|
||||
try
|
||||
{
|
||||
var duckDbEngine = await Engine.DuckDB.ParquetEngine.OpenFileOrFolderAsync(this.OpenFileOrFolderPath!, default);
|
||||
await LoadFileToGridviewImpl(duckDbEngine);
|
||||
SwapEngines(duckDbEngine);
|
||||
}
|
||||
catch (Exception duckDbEx)
|
||||
{
|
||||
//If DuckDB fails too, bail
|
||||
throw new Exceptions.RowsReadException(unhandledEx, duckDbEx);
|
||||
}
|
||||
}
|
||||
|
||||
void SwapEngines(IParquetEngine newEngine)
|
||||
{
|
||||
this._openParquetEngine.DisposeSafely();
|
||||
this._openParquetEngine = newEngine;
|
||||
}
|
||||
#else
|
||||
await this.LoadFileToGridviewImpl(this._openParquetEngine);
|
||||
#endif
|
||||
}
|
||||
|
||||
private async Task LoadFileToGridviewImpl(IParquetEngine engine)
|
||||
{
|
||||
var stopwatch = Stopwatch.StartNew(); var loadTime = TimeSpan.Zero; var indexTime = TimeSpan.Zero;
|
||||
LoadingIcon? loadingIcon = null;
|
||||
|
|
@ -296,12 +336,12 @@ namespace ParquetViewer
|
|||
return;
|
||||
}
|
||||
|
||||
long cellCount = this.SelectedFields.Count * Math.Min(this.CurrentMaxRowCount, this._openParquetEngine!.RecordCount - this.CurrentOffset);
|
||||
long cellCount = this.SelectedFields.Count * Math.Min(this.CurrentMaxRowCount, engine.RecordCount - this.CurrentOffset);
|
||||
loadingIcon = this.ShowLoadingIcon(Resources.Strings.LoadingDataLabelText, cellCount);
|
||||
|
||||
var intermediateResult = await Task.Run(async () =>
|
||||
{
|
||||
return await this._openParquetEngine.ReadRowsAsync(this.SelectedFields, this.CurrentOffset, this.CurrentMaxRowCount, loadingIcon.CancellationToken, loadingIcon);
|
||||
return await engine.ReadRowsAsync(this.SelectedFields, this.CurrentOffset, this.CurrentMaxRowCount, loadingIcon.CancellationToken, loadingIcon);
|
||||
}, loadingIcon.CancellationToken);
|
||||
|
||||
loadTime = stopwatch.Elapsed;
|
||||
|
|
@ -318,7 +358,7 @@ namespace ParquetViewer
|
|||
indexTime = stopwatch.Elapsed - loadTime;
|
||||
|
||||
this.recordCountStatusBarLabel.Text = string.Format(Resources.Strings.LoadedRecordCountRangeFormat, this.CurrentOffset, this.CurrentOffset + finalResult.Rows.Count);
|
||||
this.totalRowCountStatusBarLabel.Text = finalResult.ExtendedProperties[Engine.ParquetEngine.TotalRecordCountExtendedPropertyKey]!.ToString();
|
||||
this.totalRowCountStatusBarLabel.Text = engine.RecordCount.ToString();
|
||||
this.actualShownRecordCountLabel.Text = finalResult.Rows.Count.ToString();
|
||||
|
||||
this.MainDataSource = finalResult;
|
||||
|
|
@ -364,18 +404,23 @@ namespace ParquetViewer
|
|||
this.showingStatusBarLabel.ToolTipText = $"Total time: {totalTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$" Load time: {loadTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$" Index time: {indexTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$" Render time: {renderTime:mm\\:ss\\.ff}" + Environment.NewLine;
|
||||
$" Render time: {renderTime:mm\\:ss\\.ff}" + Environment.NewLine +
|
||||
$"Engine: {(engine is Engine.ParquetNET.ParquetEngine ? "ParquetNET" : "DuckDB")}";
|
||||
|
||||
loadingIcon?.Dispose();
|
||||
|
||||
if (wasSuccessful)
|
||||
{
|
||||
var engineType = this._openParquetEngine is Engine.ParquetNET.ParquetEngine
|
||||
? FileOpenEvent.ParquetEngineTypeId.ParquetNET
|
||||
: FileOpenEvent.ParquetEngineTypeId.DuckDB;
|
||||
|
||||
FileOpenEvent.FireAndForget(
|
||||
Directory.Exists(this.OpenFileOrFolderPath),
|
||||
this._openParquetEngine!.NumberOfPartitions,
|
||||
this._openParquetEngine.RecordCount,
|
||||
this._openParquetEngine.ThriftMetadata.RowGroups.Count,
|
||||
this._openParquetEngine.Fields.Count,
|
||||
engine.NumberOfPartitions,
|
||||
engine.RecordCount,
|
||||
engine.Metadata.RowGroups.Count,
|
||||
engine.Fields.Count,
|
||||
this.MainDataSource!.Columns.Cast<DataColumn>().Select(column => column.DataType.Name).Distinct().Order().ToArray(),
|
||||
this.CurrentOffset,
|
||||
this.CurrentMaxRowCount,
|
||||
|
|
@ -383,7 +428,8 @@ namespace ParquetViewer
|
|||
(long)totalTime.TotalMilliseconds,
|
||||
(long)loadTime.TotalMilliseconds,
|
||||
(long)indexTime.TotalMilliseconds,
|
||||
(long)renderTime.TotalMilliseconds);
|
||||
(long)renderTime.TotalMilliseconds,
|
||||
engineType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -493,4 +539,4 @@ namespace ParquetViewer
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue