Module: Legion::Data::Extract
- Defined in:
- lib/legion/data/extract.rb,
lib/legion/data/extract/handlers/csv.rb,
lib/legion/data/extract/handlers/pdf.rb,
lib/legion/data/extract/handlers/vtt.rb,
lib/legion/data/extract/handlers/base.rb,
lib/legion/data/extract/handlers/docx.rb,
lib/legion/data/extract/handlers/html.rb,
lib/legion/data/extract/handlers/json.rb,
lib/legion/data/extract/handlers/pptx.rb,
lib/legion/data/extract/handlers/text.rb,
lib/legion/data/extract/handlers/xlsx.rb,
lib/legion/data/extract/type_detector.rb,
lib/legion/data/extract/handlers/jsonl.rb,
lib/legion/data/extract/handlers/markdown.rb
Defined Under Namespace
Modules: Handlers, TypeDetector
Class Method Summary collapse
- .can_extract?(type) ⇒ Boolean
- .extract(source, type: :auto) ⇒ Object
- .register_handler(type, klass) ⇒ Object
- .supported_types ⇒ Object
Class Method Details
.can_extract?(type) ⇒ Boolean
37 38 39 40 41 |
# File 'lib/legion/data/extract.rb', line 37 def can_extract?(type) load_all_handlers handler = Handlers::Base.for_type(type&.to_sym) handler&.available? || false end |
.extract(source, type: :auto) ⇒ Object
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/legion/data/extract.rb', line 10 def extract(source, type: :auto) detected_type = type == :auto ? TypeDetector.detect(source) : type&.to_sym return { success: false, text: nil, error: :unknown_type } unless detected_type handler = Handlers::Base.for_type(detected_type) return { success: false, text: nil, error: :no_handler, type: detected_type } unless handler unless handler.available? return { success: false, text: nil, error: :gem_not_installed, gem: handler.gem_name, type: detected_type } end result = handler.extract(source) if result[:text] { success: true, text: result[:text], metadata: result[:metadata], type: detected_type } else { success: false, text: nil, error: result[:error], type: detected_type } end rescue StandardError => e { success: false, text: nil, error: e., type: detected_type } end |
.register_handler(type, klass) ⇒ Object
43 44 45 |
# File 'lib/legion/data/extract.rb', line 43 def register_handler(type, klass) Handlers::Base.registry[type.to_sym] = klass end |
.supported_types ⇒ Object
32 33 34 35 |
# File 'lib/legion/data/extract.rb', line 32 def supported_types load_all_handlers Handlers::Base.supported_types end |