/android/import-contacts : revision 65

To get this branch, use:

bzr branch
http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/am/ed/importcontacts/VcardImporter.java

Committer: edam
Date: 2012-12-20 16:49:39 UTC
Revision ID: tim@ed.am-20121220164939-j9mg98v0uofws7kw

added support for notes; rewrote backends so that all normalising of data is now done within the contacts cache; made the vCard unescape routine slightly more acceptant of non-standard escaped characters

files removed:
res/drawable/family_pic.png

files modified:
AndroidManifest.xml

NEWS

TODO

gen/am/ed/importcontacts/R.java

res/layout/configure_vcf.xml

res/layout/doit.xml

res/layout/intro.xml

res/layout/merge.xml

res/layout/mergeprompt.xml

res/values/strings.xml

src/am/ed/importcontacts/ContactsBackend.java

src/am/ed/importcontacts/ContactsCache.java

src/am/ed/importcontacts/ContactsContractBackend.java

src/am/ed/importcontacts/VcardImporter.java

Show diffs side-by-side

added added

removed removed

src/am/ed/importcontacts/VcardImporter.java

133

boolean in_vcard = false;

134

while( ( line = reader.readLine() ) != null )

135

{

136

if( !in_vcard )

137

{

136

if( !in_vcard ) {

138

137

// look for vcard beginning

139

if( line.matches( "^BEGIN[ \t]*:[ \t]*VCARD.*$" ) ) {

138

if( line.matches( "^BEGIN:VCARD" ) ) {

140

139

in_vcard = true;

141

140

_vcard_count++;

142

141

}

143

// check for vMsg files

144

else if( line.matches( "^BEGIN[ \t]*:[ \t]*VMSG.*$" ) ) {

145

showError( getText( R.string.error_vcf_vmsgfile )

146

+ file.getName() );

147

}

148

142

}

149

else if( line.matches( "^END[ \t]*:[ \t]*VCARD.*$" ) )

143

else if( line.matches( "^END:VCARD" ) )

150

144

in_vcard = false;

151

145

}

152

146

181

175

// import

182

176

importVCardFileContent( content, file.getName() );

183

177

}

184

catch( OutOfMemoryError e ) {

185

showError( R.string.error_outofmemory );

186

}

187

178

catch( FileNotFoundException e ) {

188

179

showError( getText( R.string.error_filenotfound ) +

189

180

file.getName() );

202

193

ContentLineIterator cli = new ContentLineIterator( content );

203

194

while( cli.hasNext() )

204

195

{

205

ContentLine content_line = cli.next();

196

ByteBuffer buffer = cli.next();

206

197

207

// get a US-ASCII version of the string, for processing

208

String line = content_line.getUsAsciiLine();

198

// get a US-ASCII version of the line for processing

199

String line;

200

try {

201

line = new String( buffer.array(), buffer.position(),

202

buffer.limit() - buffer.position(), "US-ASCII" );

203

}

204

catch( UnsupportedEncodingException e ) {

205

// we know US-ASCII is supported, so appease the compiler...

206

line = "";

207

}

209

208

210

209

if( vcard == null ) {

211

210

// look for vcard beginning

212

if( line.matches( "^BEGIN[ \t]*:[ \t]*VCARD.*$" ) ) {

211

if( line.matches( "^BEGIN:VCARD" ) ) {

213

212

setProgress( _progress++ );

214

213

vcard = new Vcard();

215

214

vcard_start_line = cli.getLineNumber();

217

216

}

218

217

else {

219

218

// look for vcard content or ending

220

if( line.matches( "^END[ \t]*:[ \t]*VCARD.*$" ) )

219

if( line.matches( "^END:VCARD" ) )

221

220

{

222

221

// finalise the vcard/contact

223

222

try {

260

259

{

261

260

// try giving the line to the vcard

262

261

try {

263

vcard.parseLine( content_line );

262

vcard.parseLine( buffer, line,

263

cli.doesNextLineLookFolded() );

264

}

265

catch( Vcard.ParseException e ) {

266

skipContact();

289

}

290

}

291

292

class ContentLine

293

{

294

private ByteBuffer _buffer;

295

private boolean _folded_next;

296

private String _line;

297

298

public ContentLine( ByteBuffer buffer, boolean folded_next )

299

{

300

_buffer = buffer;

301

_folded_next = folded_next;

302

_line = null;

303

}

304

305

public ByteBuffer getBuffer()

306

{

307

return _buffer;

308

}

309

310

public boolean doesNextLineLookFolded()

311

{

312

return _folded_next;

313

}

314

315

public String getUsAsciiLine()

316

{

317

// generated line and cache it

318

if( _line == null ) {

319

try {

320

_line = new String( _buffer.array(), _buffer.position(),

321

_buffer.limit() - _buffer.position(), "US-ASCII" );

322

}

323

catch( UnsupportedEncodingException e ) {

324

// we know US-ASCII *is* supported, so appease the

325

// compiler...

326

}

327

}

328

329

// return cached line

330

return _line;

331

}

332

}

333

334

class ContentLineIterator implements Iterator< ContentLine >

292

class ContentLineIterator implements Iterator< ByteBuffer >

335

293

{

336

294

protected byte[] _content = null;

337

295

protected int _pos = 0;

349

307

}

350

308

351

309

@Override

352

public ContentLine next()

310

public ByteBuffer next()

353

311

{

354

312

int initial_pos = _pos;

355

313

362

320

_pos > initial_pos )? _pos - 1 : _pos;

363

321

_pos++;

364

322

_line++;

365

return new ContentLine(

366

ByteBuffer.wrap( _content, initial_pos,

367

to - initial_pos ),

368

doesNextLineLookFolded() );

323

return ByteBuffer.wrap( _content, initial_pos,

324

to - initial_pos );

369

325

}

370

326

371

327

// we didn't find one, but were there bytes left?

373

329

int to = _pos;

374

330

_pos++;

375

331

_line++;

376

return new ContentLine(

377

ByteBuffer.wrap( _content, initial_pos,

378

to - initial_pos ),

379

doesNextLineLookFolded() );

332

return ByteBuffer.wrap( _content, initial_pos,

333

to - initial_pos );

380

334

}

381

335

382

336

// no bytes left

394

348

* onto the end of this one?

395

349

* @return

396

350

397

private boolean doesNextLineLookFolded()

351

public boolean doesNextLineLookFolded()

398

352

{

399

353

return _pos > 0 && _pos < _content.length &&

400

_content[ _pos - 1 ] == '\n' &&

401

( _content[ _pos ] == ' ' || _content[ _pos ] == '\t' );

354

_content[ _pos - 1 ] == '\n' && _content[ _pos ] == ' ';

402

355

}

403

356

404

357

public int getLineNumber()

416

369

private final static int MULTILINE_NONE = 0;

417

370

private final static int MULTILINE_ENCODED = 1; // v2.1 quoted-printable

418

371

private final static int MULTILINE_ESCAPED = 2; // v2.1 \\CRLF

419

private final static int MULTILINE_FOLDED = 3; // MIME-DIR folding

372

private final static int MULTILINE_FOLDED = 3; // v3.0 folding

420

373

421

374

private String _version = null;

422

private Vector< ContentLine > _content_lines = null;

375

private Vector< ByteBuffer > _buffers = null;

423

376

private int _name_level = NAMELEVEL_NONE;

424

377

private int _parser_multiline_state = MULTILINE_NONE;

425

378

private String _parser_current_name_and_params = null;

468

421

@SuppressWarnings("serial")

469

422

protected class SkipImportException extends Exception { }

470

423

471

private String extractCollonPartFromLine( ContentLine content_line,

472

boolean former )

424

private String extractCollonPartFromLine( ByteBuffer buffer,

425

String line, boolean former )

473

426

{

474

427

String ret = null;

475

428

429

// get a US-ASCII version of the line for processing, unless we were

430

// supplied with one

431

if( line == null ) {

432

try {

433

line = new String( buffer.array(), buffer.position(),

434

buffer.limit() - buffer.position(), "US-ASCII" );

435

}

436

catch( UnsupportedEncodingException e ) {

437

// we know US-ASCII is supported, so appease the compiler...

438

line = "";

439

}

440

}

441

476

442

// split line into name and value parts and check to make sure we

477

443

// only got 2 parts and that the first part is not zero in length

478

String[] parts = content_line.getUsAsciiLine().split( ":", 2 );

444

String[] parts = line.split( ":", 2 );

479

445

if( parts.length == 2 && parts[ 0 ].length() > 0 )

480

446

ret = parts[ former? 0 : 1 ];

481

447

482

448

return ret;

483

449

}

484

450

485

private String extractNameAndParamsFromLine( ContentLine content_line )

486

{

487

return extractCollonPartFromLine( content_line, true ).trim();

488

}

489

490

private String extractValueFromLine( ContentLine content_line )

491

{

492

return extractCollonPartFromLine( content_line, false );

493

}

494

495

public void parseLine( ContentLine content_line )

451

private String extractNameAndParamsFromLine( ByteBuffer buffer,

452

String line )

453

{

454

return extractCollonPartFromLine( buffer, line, true );

455

}

456

457

private String extractValueFromLine( ByteBuffer buffer, String line )

458

{

459

return extractCollonPartFromLine( buffer, line, false );

460

}

461

462

public void parseLine( ByteBuffer buffer, String line,

463

boolean next_line_looks_folded )

496

464

throws ParseException, SkipImportException,

497

465

AbortImportException

498

466

{

501

469

{

502

470

// tentatively get name and params from line

503

471

String name_and_params =

504

extractNameAndParamsFromLine( content_line );

472

extractNameAndParamsFromLine( buffer, line );

505

473

506

474

// is it a version line?

507

475

if( name_and_params != null &&

508

name_and_params.equalsIgnoreCase( "VERSION" ) )

476

name_and_params.equals( "VERSION" ) )

509

477

{

510

478

// yes, get it!

511

String value = extractValueFromLine( content_line ).trim();

479

String value = extractValueFromLine( buffer, line );

512

480

if( !value.equals( "2.1" ) && !value.equals( "3.0" ) )

513

481

throw new ParseException( R.string.error_vcf_version );

514

482

_version = value;

515

483

516

484

// parse any buffers we've been accumulating while we waited

517

485

// for a version

518

if( _content_lines != null )

519

for( int i = 0; i < _content_lines.size(); i++ )

520

parseLine( _content_lines.get( i ) );

521

_content_lines = null;

486

if( _buffers != null )

487

for( int i = 0; i < _buffers.size(); i++ )

488

parseLine( _buffers.get( i ), null,

489

i + 1 < _buffers.size() &&

490

_buffers.get( i + 1 ).hasRemaining() &&

491

_buffers.get( i + 1 ).get(

492

_buffers.get( i + 1 ).position() ) == ' ' );

493

_buffers = null;

522

494

}

523

495

else

524

496

{

525

497

// no, so stash this line till we get a version

526

if( _content_lines == null )

527

_content_lines = new Vector< ContentLine >();

528

_content_lines.add( content_line );

498

if( _buffers == null )

499

_buffers = new Vector< ByteBuffer >();

500

_buffers.add( buffer );

529

501

}

530

502

}

531

503

else

532

504

{

533

505

// name and params and the position in the buffer where the

534

// "value" part of the line starts

506

// "value" part of the line start

535

507

String name_and_params;

536

508

int pos;

537

509

543

515

544

516

// skip some initial line characters, depending on the type

545

517

// of multi-line we're handling

546

pos = content_line.getBuffer().position();

518

pos = buffer.position();

547

519

switch( _parser_multiline_state )

548

520

{

549

521

case MULTILINE_FOLDED:

550

522

pos++;

551

523

break;

552

524

case MULTILINE_ENCODED:

553

while( pos < content_line.getBuffer().limit() && (

554

content_line.getBuffer().get( pos ) == ' ' ||

555

content_line.getBuffer().get( pos ) == '\t' ) )

525

while( pos < buffer.limit() && (

526

buffer.get( pos ) == ' ' ||

527

buffer.get( pos ) == '\t' ) )

556

528

{

557

529

pos++;

558

530

}

567

539

}

568

540

else

569

541

{

570

// skip empty lines

571

if( content_line.getUsAsciiLine().trim().length() == 0 )

572

return;

573

574

542

// get name and params from line, and since we're not

575

543

// parsing a subsequent line in a multi-line, this should

576

544

// not fail, or it's an error

577

545

name_and_params =

578

extractNameAndParamsFromLine( content_line );

546

extractNameAndParamsFromLine( buffer, line );

579

547

if( name_and_params == null )

580

548

throw new ParseException(

581

549

R.string.error_vcf_malformed );

582

550

583

551

// calculate how many chars to skip from beginning of line

584

552

// so we skip the property "name:" part

585

pos = content_line.getBuffer().position() +

586

name_and_params.length() + 1;

553

pos = buffer.position() + name_and_params.length() + 1;

587

554

588

555

// reset the saved multi-line state

589

556

_parser_current_name_and_params = name_and_params;

592

559

593

560

// get value from buffer, as raw bytes

594

561

ByteBuffer value;

595

value = ByteBuffer.wrap( content_line.getBuffer().array(), pos,

596

content_line.getBuffer().limit() - pos );

562

value = ByteBuffer.wrap( buffer.array(), pos,

563

buffer.limit() - pos );

597

564

598

565

// get parameter parts

599

566

String[] name_param_parts = name_and_params.split( ";", -1 );

606

573

"FN", "ORG", "TITLE", "TEL", "EMAIL", "ADR", "LABEL" }

607

574

) );

608

575

boolean is_interesting_field =

609

interesting_fields.contains(

610

name_param_parts[ 0 ].toUpperCase( Locale.US ) );

576

interesting_fields.contains( name_param_parts[ 0 ] );

611

577

612

578

// parse encoding parameter

613

579

String encoding = checkParam( name_param_parts, "ENCODING" );

614

580

if( encoding != null )

615

581

encoding = encoding.toUpperCase( Locale.US );

616

582

if( is_interesting_field && encoding != null &&

617

!encoding.equalsIgnoreCase( "8BIT" ) &&

618

!encoding.equalsIgnoreCase( "QUOTED-PRINTABLE" ) )

619

//&& !encoding.equalsIgnoreCase( "BASE64" ) )

583

!encoding.equals( "8BIT" ) &&

584

!encoding.equals( "QUOTED-PRINTABLE" ) )

585

//&& !encoding.equals( "BASE64" ) )

620

586

{

621

587

throw new ParseException( R.string.error_vcf_encoding );

622

588

}

626

592

if( charset != null )

627

593

charset = charset.toUpperCase( Locale.US );

628

594

if( charset != null &&

629

!charset.equalsIgnoreCase( "US-ASCII" ) &&

630

!charset.equalsIgnoreCase( "ASCII" ) &&

631

!charset.equalsIgnoreCase( "UTF-8" ) )

595

!charset.equals( "US-ASCII" ) &&

596

!charset.equals( "ASCII" ) &&

597

!charset.equals( "UTF-8" ) )

632

598

{

633

599

throw new ParseException( R.string.error_vcf_charset );

634

600

}

636

602

// do unencoding (or default to a fake unencoding result with

637

603

// the raw string)

638

604

UnencodeResult unencoding_result = null;

639

if( encoding != null &&

640

encoding.equalsIgnoreCase( "QUOTED-PRINTABLE" ) )

641

{

605

if( encoding != null && encoding.equals( "QUOTED-PRINTABLE" ) )

642

606

unencoding_result = unencodeQuotedPrintable( value );

643

}

644

// else if( encoding != null &&

645

// encoding.equalsIgnoreCase( "BASE64" ) )

646

// {

607

// else if( encoding != null && encoding.equals( "BASE64" ) )

647

608

// unencoding_result = unencodeBase64( props[ 1 ], charset );

648

// }

649

609

if( unencoding_result != null ) {

650

610

value = unencoding_result.getBuffer();

651

611

if( unencoding_result.isAnotherLineRequired() )

656

616

// specified for a v2.1 vcard entry, we assume it's US-ASCII)

657

617

if( ( charset == null && _version.equals( "2.1" ) ) ||

658

618

( charset != null && (

659

charset.equalsIgnoreCase( "ASCII" ) ||

660

charset.equalsIgnoreCase( "US-ASCII" ) ) ) )

619

charset.equals( "ASCII" ) ||

620

charset.equals( "US-ASCII" ) ) ) )

661

621

{

662

622

value = transcodeAsciiToUtf8( value );

663

623

}

674

634

// for some entries that have semicolon-separated value parts,

675

635

// check to see if the value ends in an escape character, which

676

636

// indicates that we have a multi-line value

677

if( ( name_param_parts[ 0 ].equalsIgnoreCase( "N" ) ||

678

name_param_parts[ 0 ].equalsIgnoreCase( "ORG" ) ||

679

name_param_parts[ 0 ].equalsIgnoreCase( "ADR" ) ) &&

637

if( ( name_param_parts[ 0 ].equals( "N" ) ||

638

name_param_parts[ 0 ].equals( "ORG" ) ||

639

name_param_parts[ 0 ].equals( "ADR" ) ) &&

680

640

doesStringEndInAnEscapeChar( string_value ) )

681

641

{

682

642

_parser_multiline_state = MULTILINE_ESCAPED;

684

644

string_value.length() - 1 );

685

645

}

686

646

687

// if we know we're not in an encoding-based multi-line, check

688

// to see if we're in a folded multi-line

647

// now we know whether we're in an encoding multi-line,

648

// determine if we're in a v3 folded multi-line or not

689

649

if( _parser_multiline_state == MULTILINE_NONE &&

690

content_line.doesNextLineLookFolded() )

650

_version.equals( "3.0" ) && next_line_looks_folded )

691

651

{

692

652

_parser_multiline_state = MULTILINE_FOLDED;

693

653

}

705

665

if( complete_value.length() < 1 ) return;

706

666

707

667

// parse some properties

708

if( name_param_parts[ 0 ].equalsIgnoreCase( "N" ) )

668

if( name_param_parts[ 0 ].equals( "N" ) )

709

669

parseN( name_param_parts, complete_value );

710

else if( name_param_parts[ 0 ].equalsIgnoreCase( "FN" ) )

670

else if( name_param_parts[ 0 ].equals( "FN" ) )

711

671

parseFN( name_param_parts, complete_value );

712

else if( name_param_parts[ 0 ].equalsIgnoreCase( "ORG" ) )

672

else if( name_param_parts[ 0 ].equals( "ORG" ) )

713

673

parseORG( name_param_parts, complete_value );

714

else if( name_param_parts[ 0 ].equalsIgnoreCase( "TITLE" ) )

674

else if( name_param_parts[ 0 ].equals( "TITLE" ) )

715

675

parseTITLE( name_param_parts, complete_value );

716

else if( name_param_parts[ 0 ].equalsIgnoreCase( "TEL" ) )

676

else if( name_param_parts[ 0 ].equals( "TEL" ) )

717

677

parseTEL( name_param_parts, complete_value );

718

else if( name_param_parts[ 0 ].equalsIgnoreCase( "EMAIL" ) )

678

else if( name_param_parts[ 0 ].equals( "EMAIL" ) )

719

679

parseEMAIL( name_param_parts, complete_value );

720

else if( name_param_parts[ 0 ].equalsIgnoreCase( "ADR" ) )

680

else if( name_param_parts[ 0 ].equals( "ADR" ) )

721

681

parseADR( name_param_parts, complete_value );

722

else if( name_param_parts[ 0 ].equalsIgnoreCase( "LABEL" ) )

682

else if( name_param_parts[ 0 ].equals( "LABEL" ) )

723

683

parseLABEL( name_param_parts, complete_value );

724

else if( name_param_parts[ 0 ].equalsIgnoreCase( "NOTE" ) )

684

else if( name_param_parts[ 0 ].equals( "NOTE" ) )

725

685

parseNOTE( name_param_parts, complete_value );

726

686

}

727

687

}

849

809

for( int b = 0; b < name_part_parts.length; b++ )

850

810

if( name_part_parts[ b ].length() > 0 )

851

811

{

852

if( value.length() > 0 ) value += " ";

812

if( value.length() == 0 ) value += " ";

853

813

value += name_part_parts[ b ];

854

814

}

855

815

}

974

934

for( int a = 0; a < adr_parts.length; a++ )

975

935

if( adr_parts[ a ].length() > 0 )

976

936

{

977

// version 3.0 vCards allow further splitting by comma

978

if( _version.equals( "3.0" ) )

979

{

980

// split this part in to it's comma-separated bits and

981

// add them on individual lines

982

String[] adr_part_parts =

983

splitValueByCharacter( adr_parts[ a ], ',' );

984

for( int b = 0; b < adr_part_parts.length; b++ )

985

if( adr_part_parts[ b ].length() > 0 )

986

{

987

if( value.length() > 0 ) value += "\n";

988

value += adr_part_parts[ b ];

989

}

990

}

991

else

992

{

993

// add this part on an individual line

994

if( value.length() > 0 ) value += "\n";

995

value += adr_parts[ a ];

996

}

937

// split this part in to it's comma-separated bits

938

String[] adr_part_parts =

939

splitValueByCharacter( adr_parts[ a ], ',' );

940

for( int b = 0; b < adr_part_parts.length; b++ )

941

if( adr_part_parts[ b ].length() > 0 )

942

{

943

if( value.length() > 0 ) value += "\n";

944

value += adr_part_parts[ b ];

945

}

997

946

}

998

947

999

948

Set< String > types = extractTypes( params, Arrays.asList(

1033

982

throws ParseException, ContactNotIdentifiableException

1034

983

{

1035

984

// missing version (and data is present)

1036

if( _version == null && _content_lines != null )

985

if( _version == null && _buffers != null )

1037

986

throw new ParseException( R.string.error_vcf_malformed );

1038

987

1039

988

// finalise the parent class

1064

1013

HashSet< String > ret = new HashSet< String >();

1065

1014

1066

1015

Pattern p = Pattern.compile(

1067

"^" + name + "[ \\t]*=[ \\t]*(\"?)(.*)\\1$",

1068

Pattern.CASE_INSENSITIVE );

1016

"^" + name + "[ \\t]*=[ \\t]*(\"?)(.*)\\1$" );

1069

1017

for( int i = 0; i < params.length; i++ ) {

1070

1018

Matcher m = p.matcher( params[ i ] );

1071

1019

if( m.matches() )

1079

1027

* Amongst the params, return any type values present. For v2.1 vCards,

1080

1028

* those types are just parameters. For v3.0, they are prefixed with

1081

1029

* "TYPE=". There may also be multiple type parameters.

1082

* @param params an array of params to look for types in

1083

* @param valid_types an list of upper-case type values to look for

1030

* @param params

1031

* @param a list of type values to look for

1084

1032

* @return a set of present type values

1085

1033

1086

1034

private Set< String > extractTypes( String[] params,

1092

1040

String type_params[] = checkParams( params, "TYPE" );

1093

1041

for( int a = 0; a < type_params.length; a++ )

1094

1042

{

1095

// check for a comma-separated list of types (why? I don't think

1096

// this is in the specs!)

1043

// check for a comma-separated list of types (why? this isn't in

1044

// the specs!)

1097

1045

String[] parts = type_params[ a ].split( "," );

1098

for( int i = 0; i < parts.length; i++ ) {

1099

String ucpart = parts[ i ].toUpperCase( Locale.US );

1100

if( valid_types.contains( ucpart ) )

1101

types.add( ucpart );

1102

}

1046

for( int i = 0; i < parts.length; i++ )

1047

if( valid_types.contains( parts[ i ] ) )

1048

types.add( parts[ i ] );

1103

1049

}

1104

1050

1105

1051

// get 2.1-style type param

1106

1052

if( _version.equals( "2.1" ) ) {

1107

for( int i = 1; i < params.length; i++ ) {

1108

String ucparam = params[ i ].toUpperCase( Locale.US );

1109

if( valid_types.contains( ucparam ) )

1110

types.add( ucparam );

1111

}

1053

for( int i = 1; i < params.length; i++ )

1054

if( valid_types.contains( params[ i ] ) )

1055

types.add( params[ i ] );

1112

1056

}

1113

1057

1114

1058

return types;

Older »