Module: Legion::Data::Extract

Defined in:
lib/legion/data/extract.rb,
lib/legion/data/extract/handlers/csv.rb,
lib/legion/data/extract/handlers/pdf.rb,
lib/legion/data/extract/handlers/base.rb,
lib/legion/data/extract/handlers/docx.rb,
lib/legion/data/extract/handlers/html.rb,
lib/legion/data/extract/handlers/json.rb,
lib/legion/data/extract/handlers/pptx.rb,
lib/legion/data/extract/handlers/text.rb,
lib/legion/data/extract/handlers/xlsx.rb,
lib/legion/data/extract/type_detector.rb,
lib/legion/data/extract/handlers/jsonl.rb,
lib/legion/data/extract/handlers/markdown.rb

Defined Under Namespace

Modules: Handlers, TypeDetector

Class Method Summary collapse

Class Method Details

.can_extract?(type) ⇒ Boolean

Returns:

  • (Boolean)


37
38
39
40
41
# File 'lib/legion/data/extract.rb', line 37

def can_extract?(type)
  load_all_handlers
  handler = Handlers::Base.for_type(type&.to_sym)
  handler&.available? || false
end

.extract(source, type: :auto) ⇒ Object



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/legion/data/extract.rb', line 10

def extract(source, type: :auto)
  detected_type = type == :auto ? TypeDetector.detect(source) : type&.to_sym
  return { success: false, text: nil, error: :unknown_type } unless detected_type

  handler = Handlers::Base.for_type(detected_type)
  return { success: false, text: nil, error: :no_handler, type: detected_type } unless handler

  unless handler.available?
    return { success: false, text: nil, error: :gem_not_installed,
             gem: handler.gem_name, type: detected_type }
  end

  result = handler.extract(source)
  if result[:text]
    { success: true, text: result[:text], metadata: result[:metadata], type: detected_type }
  else
    { success: false, text: nil, error: result[:error], type: detected_type }
  end
rescue StandardError => e
  { success: false, text: nil, error: e.message, type: detected_type }
end

.register_handler(type, klass) ⇒ Object



43
44
45
# File 'lib/legion/data/extract.rb', line 43

def register_handler(type, klass)
  Handlers::Base.registry[type.to_sym] = klass
end

.supported_typesObject



32
33
34
35
# File 'lib/legion/data/extract.rb', line 32

def supported_types
  load_all_handlers
  Handlers::Base.supported_types
end